diff options
199 files changed, 7850 insertions, 3281 deletions
diff --git a/Documentation/bpf/map_sockmap.rst b/Documentation/bpf/map_sockmap.rst new file mode 100644 index 000000000000..cc92047c6630 --- /dev/null +++ b/Documentation/bpf/map_sockmap.rst @@ -0,0 +1,498 @@ +.. SPDX-License-Identifier: GPL-2.0-only +.. Copyright Red Hat + +============================================== +BPF_MAP_TYPE_SOCKMAP and BPF_MAP_TYPE_SOCKHASH +============================================== + +.. note:: + - ``BPF_MAP_TYPE_SOCKMAP`` was introduced in kernel version 4.14 + - ``BPF_MAP_TYPE_SOCKHASH`` was introduced in kernel version 4.18 + +``BPF_MAP_TYPE_SOCKMAP`` and ``BPF_MAP_TYPE_SOCKHASH`` maps can be used to +redirect skbs between sockets or to apply policy at the socket level based on +the result of a BPF (verdict) program with the help of the BPF helpers +``bpf_sk_redirect_map()``, ``bpf_sk_redirect_hash()``, +``bpf_msg_redirect_map()`` and ``bpf_msg_redirect_hash()``. + +``BPF_MAP_TYPE_SOCKMAP`` is backed by an array that uses an integer key as the +index to look up a reference to a ``struct sock``. The map values are socket +descriptors. Similarly, ``BPF_MAP_TYPE_SOCKHASH`` is a hash backed BPF map that +holds references to sockets via their socket descriptors. + +.. note:: + The value type is either __u32 or __u64; the latter (__u64) is to support + returning socket cookies to userspace. Returning the ``struct sock *`` that + the map holds to user-space is neither safe nor useful. + +These maps may have BPF programs attached to them, specifically a parser program +and a verdict program. The parser program determines how much data has been +parsed and therefore how much data needs to be queued to come to a verdict. The +verdict program is essentially the redirect program and can return a verdict +of ``__SK_DROP``, ``__SK_PASS``, or ``__SK_REDIRECT``. + +When a socket is inserted into one of these maps, its socket callbacks are +replaced and a ``struct sk_psock`` is attached to it. Additionally, this +``sk_psock`` inherits the programs that are attached to the map. + +A sock object may be in multiple maps, but can only inherit a single +parse or verdict program. If adding a sock object to a map would result +in having multiple parser programs the update will return an EBUSY error. + +The supported programs to attach to these maps are: + +.. code-block:: c + + struct sk_psock_progs { + struct bpf_prog *msg_parser; + struct bpf_prog *stream_parser; + struct bpf_prog *stream_verdict; + struct bpf_prog *skb_verdict; + }; + +.. note:: + Users are not allowed to attach ``stream_verdict`` and ``skb_verdict`` + programs to the same map. + +The attach types for the map programs are: + +- ``msg_parser`` program - ``BPF_SK_MSG_VERDICT``. +- ``stream_parser`` program - ``BPF_SK_SKB_STREAM_PARSER``. +- ``stream_verdict`` program - ``BPF_SK_SKB_STREAM_VERDICT``. +- ``skb_verdict`` program - ``BPF_SK_SKB_VERDICT``. + +There are additional helpers available to use with the parser and verdict +programs: ``bpf_msg_apply_bytes()`` and ``bpf_msg_cork_bytes()``. With +``bpf_msg_apply_bytes()`` BPF programs can tell the infrastructure how many +bytes the given verdict should apply to. The helper ``bpf_msg_cork_bytes()`` +handles a different case where a BPF program cannot reach a verdict on a msg +until it receives more bytes AND the program doesn't want to forward the packet +until it is known to be good. + +Finally, the helpers ``bpf_msg_pull_data()`` and ``bpf_msg_push_data()`` are +available to ``BPF_PROG_TYPE_SK_MSG`` BPF programs to pull in data and set the +start and end pointers to given values or to add metadata to the ``struct +sk_msg_buff *msg``. + +All these helpers will be described in more detail below. + +Usage +===== +Kernel BPF +---------- +bpf_msg_redirect_map() +^^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + long bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) + +This helper is used in programs implementing policies at the socket level. If +the message ``msg`` is allowed to pass (i.e., if the verdict BPF program +returns ``SK_PASS``), redirect it to the socket referenced by ``map`` (of type +``BPF_MAP_TYPE_SOCKMAP``) at index ``key``. Both ingress and egress interfaces +can be used for redirection. The ``BPF_F_INGRESS`` value in ``flags`` is used +to select the ingress path otherwise the egress path is selected. This is the +only flag supported for now. + +Returns ``SK_PASS`` on success, or ``SK_DROP`` on error. + +bpf_sk_redirect_map() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + long bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key u64 flags) + +Redirect the packet to the socket referenced by ``map`` (of type +``BPF_MAP_TYPE_SOCKMAP``) at index ``key``. Both ingress and egress interfaces +can be used for redirection. The ``BPF_F_INGRESS`` value in ``flags`` is used +to select the ingress path otherwise the egress path is selected. This is the +only flag supported for now. + +Returns ``SK_PASS`` on success, or ``SK_DROP`` on error. + +bpf_map_lookup_elem() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) + +socket entries of type ``struct sock *`` can be retrieved using the +``bpf_map_lookup_elem()`` helper. + +bpf_sock_map_update() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + long bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + +Add an entry to, or update a ``map`` referencing sockets. The ``skops`` is used +as a new value for the entry associated to ``key``. The ``flags`` argument can +be one of the following: + +- ``BPF_ANY``: Create a new element or update an existing element. +- ``BPF_NOEXIST``: Create a new element only if it did not exist. +- ``BPF_EXIST``: Update an existing element. + +If the ``map`` has BPF programs (parser and verdict), those will be inherited +by the socket being added. If the socket is already attached to BPF programs, +this results in an error. + +Returns 0 on success, or a negative error in case of failure. + +bpf_sock_hash_update() +^^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + +Add an entry to, or update a sockhash ``map`` referencing sockets. The ``skops`` +is used as a new value for the entry associated to ``key``. + +The ``flags`` argument can be one of the following: + +- ``BPF_ANY``: Create a new element or update an existing element. +- ``BPF_NOEXIST``: Create a new element only if it did not exist. +- ``BPF_EXIST``: Update an existing element. + +If the ``map`` has BPF programs (parser and verdict), those will be inherited +by the socket being added. If the socket is already attached to BPF programs, +this results in an error. + +Returns 0 on success, or a negative error in case of failure. + +bpf_msg_redirect_hash() +^^^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + long bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) + +This helper is used in programs implementing policies at the socket level. If +the message ``msg`` is allowed to pass (i.e., if the verdict BPF program returns +``SK_PASS``), redirect it to the socket referenced by ``map`` (of type +``BPF_MAP_TYPE_SOCKHASH``) using hash ``key``. Both ingress and egress +interfaces can be used for redirection. The ``BPF_F_INGRESS`` value in +``flags`` is used to select the ingress path otherwise the egress path is +selected. This is the only flag supported for now. + +Returns ``SK_PASS`` on success, or ``SK_DROP`` on error. + +bpf_sk_redirect_hash() +^^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + long bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) + +This helper is used in programs implementing policies at the skb socket level. +If the sk_buff ``skb`` is allowed to pass (i.e., if the verdict BPF program +returns ``SK_PASS``), redirect it to the socket referenced by ``map`` (of type +``BPF_MAP_TYPE_SOCKHASH``) using hash ``key``. Both ingress and egress +interfaces can be used for redirection. The ``BPF_F_INGRESS`` value in +``flags`` is used to select the ingress path otherwise the egress path is +selected. This is the only flag supported for now. + +Returns ``SK_PASS`` on success, or ``SK_DROP`` on error. + +bpf_msg_apply_bytes() +^^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + long bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) + +For socket policies, apply the verdict of the BPF program to the next (number +of ``bytes``) of message ``msg``. For example, this helper can be used in the +following cases: + +- A single ``sendmsg()`` or ``sendfile()`` system call contains multiple + logical messages that the BPF program is supposed to read and for which it + should apply a verdict. +- A BPF program only cares to read the first ``bytes`` of a ``msg``. If the + message has a large payload, then setting up and calling the BPF program + repeatedly for all bytes, even though the verdict is already known, would + create unnecessary overhead. + +Returns 0 + +bpf_msg_cork_bytes() +^^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + long bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) + +For socket policies, prevent the execution of the verdict BPF program for +message ``msg`` until the number of ``bytes`` have been accumulated. + +This can be used when one needs a specific number of bytes before a verdict can +be assigned, even if the data spans multiple ``sendmsg()`` or ``sendfile()`` +calls. + +Returns 0 + +bpf_msg_pull_data() +^^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + long bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) + +For socket policies, pull in non-linear data from user space for ``msg`` and set +pointers ``msg->data`` and ``msg->data_end`` to ``start`` and ``end`` bytes +offsets into ``msg``, respectively. + +If a program of type ``BPF_PROG_TYPE_SK_MSG`` is run on a ``msg`` it can only +parse data that the (``data``, ``data_end``) pointers have already consumed. +For ``sendmsg()`` hooks this is likely the first scatterlist element. But for +calls relying on the ``sendpage`` handler (e.g., ``sendfile()``) this will be +the range (**0**, **0**) because the data is shared with user space and by +default the objective is to avoid allowing user space to modify data while (or +after) BPF verdict is being decided. This helper can be used to pull in data +and to set the start and end pointers to given values. Data will be copied if +necessary (i.e., if data was not linear and if start and end pointers do not +point to the same chunk). + +A call to this helper is susceptible to change the underlying packet buffer. +Therefore, at load time, all checks on pointers previously done by the verifier +are invalidated and must be performed again, if the helper is used in +combination with direct packet access. + +All values for ``flags`` are reserved for future usage, and must be left at +zero. + +Returns 0 on success, or a negative error in case of failure. + +bpf_map_lookup_elem() +^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c + + void *bpf_map_lookup_elem(struct bpf_map *map, const void *key) + +Look up a socket entry in the sockmap or sockhash map. + +Returns the socket entry associated to ``key``, or NULL if no entry was found. + +bpf_map_update_elem() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) + +Add or update a socket entry in a sockmap or sockhash. + +The flags argument can be one of the following: + +- BPF_ANY: Create a new element or update an existing element. +- BPF_NOEXIST: Create a new element only if it did not exist. +- BPF_EXIST: Update an existing element. + +Returns 0 on success, or a negative error in case of failure. + +bpf_map_delete_elem() +^^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + long bpf_map_delete_elem(struct bpf_map *map, const void *key) + +Delete a socket entry from a sockmap or a sockhash. + +Returns 0 on success, or a negative error in case of failure. + +User space +---------- +bpf_map_update_elem() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags) + +Sockmap entries can be added or updated using the ``bpf_map_update_elem()`` +function. The ``key`` parameter is the index value of the sockmap array. And the +``value`` parameter is the FD value of that socket. + +Under the hood, the sockmap update function uses the socket FD value to +retrieve the associated socket and its attached psock. + +The flags argument can be one of the following: + +- BPF_ANY: Create a new element or update an existing element. +- BPF_NOEXIST: Create a new element only if it did not exist. +- BPF_EXIST: Update an existing element. + +bpf_map_lookup_elem() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + int bpf_map_lookup_elem(int fd, const void *key, void *value) + +Sockmap entries can be retrieved using the ``bpf_map_lookup_elem()`` function. + +.. note:: + The entry returned is a socket cookie rather than a socket itself. + +bpf_map_delete_elem() +^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: c + + int bpf_map_delete_elem(int fd, const void *key) + +Sockmap entries can be deleted using the ``bpf_map_delete_elem()`` +function. + +Returns 0 on success, or negative error in case of failure. + +Examples +======== + +Kernel BPF +---------- +Several examples of the use of sockmap APIs can be found in: + +- `tools/testing/selftests/bpf/progs/test_sockmap_kern.h`_ +- `tools/testing/selftests/bpf/progs/sockmap_parse_prog.c`_ +- `tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c`_ +- `tools/testing/selftests/bpf/progs/test_sockmap_listen.c`_ +- `tools/testing/selftests/bpf/progs/test_sockmap_update.c`_ + +The following code snippet shows how to declare a sockmap. + +.. code-block:: c + + struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); + } sock_map_rx SEC(".maps"); + +The following code snippet shows a sample parser program. + +.. code-block:: c + + SEC("sk_skb/stream_parser") + int bpf_prog_parser(struct __sk_buff *skb) + { + return skb->len; + } + +The following code snippet shows a simple verdict program that interacts with a +sockmap to redirect traffic to another socket based on the local port. + +.. code-block:: c + + SEC("sk_skb/stream_verdict") + int bpf_prog_verdict(struct __sk_buff *skb) + { + __u32 lport = skb->local_port; + __u32 idx = 0; + + if (lport == 10000) + return bpf_sk_redirect_map(skb, &sock_map_rx, idx, 0); + + return SK_PASS; + } + +The following code snippet shows how to declare a sockhash map. + +.. code-block:: c + + struct socket_key { + __u32 src_ip; + __u32 dst_ip; + __u32 src_port; + __u32 dst_port; + }; + + struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, 1); + __type(key, struct socket_key); + __type(value, __u64); + } sock_hash_rx SEC(".maps"); + +The following code snippet shows a simple verdict program that interacts with a +sockhash to redirect traffic to another socket based on a hash of some of the +skb parameters. + +.. code-block:: c + + static inline + void extract_socket_key(struct __sk_buff *skb, struct socket_key *key) + { + key->src_ip = skb->remote_ip4; + key->dst_ip = skb->local_ip4; + key->src_port = skb->remote_port >> 16; + key->dst_port = (bpf_htonl(skb->local_port)) >> 16; + } + + SEC("sk_skb/stream_verdict") + int bpf_prog_verdict(struct __sk_buff *skb) + { + struct socket_key key; + + extract_socket_key(skb, &key); + + return bpf_sk_redirect_hash(skb, &sock_hash_rx, &key, 0); + } + +User space +---------- +Several examples of the use of sockmap APIs can be found in: + +- `tools/testing/selftests/bpf/prog_tests/sockmap_basic.c`_ +- `tools/testing/selftests/bpf/test_sockmap.c`_ +- `tools/testing/selftests/bpf/test_maps.c`_ + +The following code sample shows how to create a sockmap, attach a parser and +verdict program, as well as add a socket entry. + +.. code-block:: c + + int create_sample_sockmap(int sock, int parse_prog_fd, int verdict_prog_fd) + { + int index = 0; + int map, err; + + map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int), sizeof(int), 1, NULL); + if (map < 0) { + fprintf(stderr, "Failed to create sockmap: %s\n", strerror(errno)); + return -1; + } + + err = bpf_prog_attach(parse_prog_fd, map, BPF_SK_SKB_STREAM_PARSER, 0); + if (err){ + fprintf(stderr, "Failed to attach_parser_prog_to_map: %s\n", strerror(errno)); + goto out; + } + + err = bpf_prog_attach(verdict_prog_fd, map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (err){ + fprintf(stderr, "Failed to attach_verdict_prog_to_map: %s\n", strerror(errno)); + goto out; + } + + err = bpf_map_update_elem(map, &index, &sock, BPF_NOEXIST); + if (err) { + fprintf(stderr, "Failed to update sockmap: %s\n", strerror(errno)); + goto out; + } + + out: + close(map); + return err; + } + +References +=========== + +- https://github.com/jrfastab/linux-kernel-xdp/commit/c89fd73cb9d2d7f3c716c3e00836f07b1aeb261f +- https://lwn.net/Articles/731133/ +- http://vger.kernel.org/lpc_net2018_talks/ktls_bpf_paper.pdf +- https://lwn.net/Articles/748628/ +- https://lore.kernel.org/bpf/20200218171023.844439-7-jakub@cloudflare.com/ + +.. _`tools/testing/selftests/bpf/progs/test_sockmap_kern.h`: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +.. _`tools/testing/selftests/bpf/progs/sockmap_parse_prog.c`: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c +.. _`tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c`: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c +.. _`tools/testing/selftests/bpf/prog_tests/sockmap_basic.c`: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +.. _`tools/testing/selftests/bpf/test_sockmap.c`: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/test_sockmap.c +.. _`tools/testing/selftests/bpf/test_maps.c`: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/test_maps.c +.. _`tools/testing/selftests/bpf/progs/test_sockmap_listen.c`: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/progs/test_sockmap_listen.c +.. _`tools/testing/selftests/bpf/progs/test_sockmap_update.c`: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/progs/test_sockmap_update.c diff --git a/Documentation/devicetree/bindings/net/amlogic,g12a-mdio-mux.yaml b/Documentation/devicetree/bindings/net/amlogic,g12a-mdio-mux.yaml new file mode 100644 index 000000000000..ec5c038ce6a0 --- /dev/null +++ b/Documentation/devicetree/bindings/net/amlogic,g12a-mdio-mux.yaml @@ -0,0 +1,80 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/amlogic,g12a-mdio-mux.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MDIO bus multiplexer/glue of Amlogic G12a SoC family + +description: + This is a special case of a MDIO bus multiplexer. It allows to choose between + the internal mdio bus leading to the embedded 10/100 PHY or the external + MDIO bus. + +maintainers: + - Neil Armstrong <neil.armstrong@linaro.org> + +allOf: + - $ref: mdio-mux.yaml# + +properties: + compatible: + const: amlogic,g12a-mdio-mux + + reg: + maxItems: 1 + + clocks: + items: + - description: peripheral clock + - description: platform crytal + - description: SoC 50MHz MPLL + + clock-names: + items: + - const: pclk + - const: clkin0 + - const: clkin1 + +required: + - compatible + - reg + - clocks + - clock-names + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/interrupt-controller/irq.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + mdio-multiplexer@4c000 { + compatible = "amlogic,g12a-mdio-mux"; + reg = <0x4c000 0xa4>; + clocks = <&clkc_eth_phy>, <&xtal>, <&clkc_mpll>; + clock-names = "pclk", "clkin0", "clkin1"; + mdio-parent-bus = <&mdio0>; + #address-cells = <1>; + #size-cells = <0>; + + mdio@0 { + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + }; + + mdio@1 { + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + + ethernet-phy@8 { + compatible = "ethernet-phy-id0180.3301", + "ethernet-phy-ieee802.3-c22"; + interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>; + reg = <8>; + max-speed = <100>; + }; + }; + }; +... diff --git a/Documentation/devicetree/bindings/net/maxlinear,gpy2xx.yaml b/Documentation/devicetree/bindings/net/maxlinear,gpy2xx.yaml new file mode 100644 index 000000000000..d71fa9de2b64 --- /dev/null +++ b/Documentation/devicetree/bindings/net/maxlinear,gpy2xx.yaml @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/maxlinear,gpy2xx.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MaxLinear GPY2xx PHY + +maintainers: + - Andrew Lunn <andrew@lunn.ch> + - Michael Walle <michael@walle.cc> + +allOf: + - $ref: ethernet-phy.yaml# + +properties: + maxlinear,use-broken-interrupts: + description: | + Interrupts are broken on some GPY2xx PHYs in that they keep the + interrupt line asserted even after the interrupt status register is + cleared. Thus it is blocking the interrupt line which is usually bad + for shared lines. By default interrupts are disabled for this PHY and + polling mode is used. If one can live with the consequences, this + property can be used to enable interrupt handling. + + Affected PHYs (as far as known) are GPY215B and GPY215C. + type: boolean + +dependencies: + maxlinear,use-broken-interrupts: [ interrupts ] + +unevaluatedProperties: false + +examples: + - | + ethernet { + #address-cells = <1>; + #size-cells = <0>; + + ethernet-phy@0 { + reg = <0>; + interrupts-extended = <&intc 0>; + maxlinear,use-broken-interrupts; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/net/mdio-mux-meson-g12a.txt b/Documentation/devicetree/bindings/net/mdio-mux-meson-g12a.txt deleted file mode 100644 index 3a96cbed9294..000000000000 --- a/Documentation/devicetree/bindings/net/mdio-mux-meson-g12a.txt +++ /dev/null @@ -1,48 +0,0 @@ -Properties for the MDIO bus multiplexer/glue of Amlogic G12a SoC family. - -This is a special case of a MDIO bus multiplexer. It allows to choose between -the internal mdio bus leading to the embedded 10/100 PHY or the external -MDIO bus. - -Required properties in addition to the generic multiplexer properties: -- compatible : amlogic,g12a-mdio-mux -- reg: physical address and length of the multiplexer/glue registers -- clocks: list of clock phandle, one for each entry clock-names. -- clock-names: should contain the following: - * "pclk" : peripheral clock. - * "clkin0" : platform crytal - * "clkin1" : SoC 50MHz MPLL - -Example : - -mdio_mux: mdio-multiplexer@4c000 { - compatible = "amlogic,g12a-mdio-mux"; - reg = <0x0 0x4c000 0x0 0xa4>; - clocks = <&clkc CLKID_ETH_PHY>, - <&xtal>, - <&clkc CLKID_MPLL_5OM>; - clock-names = "pclk", "clkin0", "clkin1"; - mdio-parent-bus = <&mdio0>; - #address-cells = <1>; - #size-cells = <0>; - - ext_mdio: mdio@0 { - reg = <0>; - #address-cells = <1>; - #size-cells = <0>; - }; - - int_mdio: mdio@1 { - reg = <1>; - #address-cells = <1>; - #size-cells = <0>; - - internal_ephy: ethernet-phy@8 { - compatible = "ethernet-phy-id0180.3301", - "ethernet-phy-ieee802.3-c22"; - interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>; - reg = <8>; - max-speed = <100>; - }; - }; -}; diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml index 821974815dec..900063411a20 100644 --- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml +++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml @@ -57,6 +57,7 @@ properties: - ti,am654-cpsw-nuss - ti,j7200-cpswxg-nuss - ti,j721e-cpsw-nuss + - ti,j721e-cpswxg-nuss - ti,am642-cpsw-nuss reg: @@ -111,7 +112,7 @@ properties: const: 0 patternProperties: - "^port@[1-4]$": + "^port@[1-8]$": type: object description: CPSWxG NUSS external ports @@ -121,7 +122,7 @@ properties: properties: reg: minimum: 1 - maximum: 4 + maximum: 8 description: CPSW port number phys: @@ -186,12 +187,36 @@ allOf: properties: compatible: contains: - const: ti,j7200-cpswxg-nuss + const: ti,j721e-cpswxg-nuss then: properties: ethernet-ports: patternProperties: - "^port@[3-4]$": false + "^port@[5-8]$": false + "^port@[1-4]$": + properties: + reg: + minimum: 1 + maximum: 4 + + - if: + not: + properties: + compatible: + contains: + enum: + - ti,j721e-cpswxg-nuss + - ti,j7200-cpswxg-nuss + then: + properties: + ethernet-ports: + patternProperties: + "^port@[3-8]$": false + "^port@[1-2]$": + properties: + reg: + minimum: 1 + maximum: 2 additionalProperties: false diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 70ffb3780621..161766b1de50 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -775,6 +775,8 @@ patternProperties: description: MaxBotix Inc. "^maxim,.*": description: Maxim Integrated Products + "^maxlinear,.*": + description: MaxLinear Inc. "^mbvl,.*": description: Mobiveil Inc. "^mcube,.*": diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index f10f8eb44255..c59b542eb693 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1716,6 +1716,141 @@ being used. Current supported options are toeplitz, xor or crc32. ETHTOOL_A_RSS_INDIR attribute returns RSS indrection table where each byte indicates queue number. +PLCA_GET_CFG +============ + +Gets the IEEE 802.3cg-2019 Clause 148 Physical Layer Collision Avoidance +(PLCA) Reconciliation Sublayer (RS) attributes. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_PLCA_HEADER`` nested request header + ===================================== ====== ========================== + +Kernel response contents: + + ====================================== ====== ============================= + ``ETHTOOL_A_PLCA_HEADER`` nested reply header + ``ETHTOOL_A_PLCA_VERSION`` u16 Supported PLCA management + interface standard/version + ``ETHTOOL_A_PLCA_ENABLED`` u8 PLCA Admin State + ``ETHTOOL_A_PLCA_NODE_ID`` u32 PLCA unique local node ID + ``ETHTOOL_A_PLCA_NODE_CNT`` u32 Number of PLCA nodes on the + network, including the + coordinator + ``ETHTOOL_A_PLCA_TO_TMR`` u32 Transmit Opportunity Timer + value in bit-times (BT) + ``ETHTOOL_A_PLCA_BURST_CNT`` u32 Number of additional packets + the node is allowed to send + within a single TO + ``ETHTOOL_A_PLCA_BURST_TMR`` u32 Time to wait for the MAC to + transmit a new frame before + terminating the burst + ====================================== ====== ============================= + +When set, the optional ``ETHTOOL_A_PLCA_VERSION`` attribute indicates which +standard and version the PLCA management interface complies to. When not set, +the interface is vendor-specific and (possibly) supplied by the driver. +The OPEN Alliance SIG specifies a standard register map for 10BASE-T1S PHYs +embedding the PLCA Reconcialiation Sublayer. See "10BASE-T1S PLCA Management +Registers" at https://www.opensig.org/about/specifications/. + +When set, the optional ``ETHTOOL_A_PLCA_ENABLED`` attribute indicates the +administrative state of the PLCA RS. When not set, the node operates in "plain" +CSMA/CD mode. This option is corresponding to ``IEEE 802.3cg-2019`` 30.16.1.1.1 +aPLCAAdminState / 30.16.1.2.1 acPLCAAdminControl. + +When set, the optional ``ETHTOOL_A_PLCA_NODE_ID`` attribute indicates the +configured local node ID of the PHY. This ID determines which transmit +opportunity (TO) is reserved for the node to transmit into. This option is +corresponding to ``IEEE 802.3cg-2019`` 30.16.1.1.4 aPLCALocalNodeID. The valid +range for this attribute is [0 .. 255] where 255 means "not configured". + +When set, the optional ``ETHTOOL_A_PLCA_NODE_CNT`` attribute indicates the +configured maximum number of PLCA nodes on the mixing-segment. This number +determines the total number of transmit opportunities generated during a +PLCA cycle. This attribute is relevant only for the PLCA coordinator, which is +the node with aPLCALocalNodeID set to 0. Follower nodes ignore this setting. +This option is corresponding to ``IEEE 802.3cg-2019`` 30.16.1.1.3 +aPLCANodeCount. The valid range for this attribute is [1 .. 255]. + +When set, the optional ``ETHTOOL_A_PLCA_TO_TMR`` attribute indicates the +configured value of the transmit opportunity timer in bit-times. This value +must be set equal across all nodes sharing the medium for PLCA to work +correctly. This option is corresponding to ``IEEE 802.3cg-2019`` 30.16.1.1.5 +aPLCATransmitOpportunityTimer. The valid range for this attribute is +[0 .. 255]. + +When set, the optional ``ETHTOOL_A_PLCA_BURST_CNT`` attribute indicates the +configured number of extra packets that the node is allowed to send during a +single transmit opportunity. By default, this attribute is 0, meaning that +the node can only send a sigle frame per TO. When greater than 0, the PLCA RS +keeps the TO after any transmission, waiting for the MAC to send a new frame +for up to aPLCABurstTimer BTs. This can only happen a number of times per PLCA +cycle up to the value of this parameter. After that, the burst is over and the +normal counting of TOs resumes. This option is corresponding to +``IEEE 802.3cg-2019`` 30.16.1.1.6 aPLCAMaxBurstCount. The valid range for this +attribute is [0 .. 255]. + +When set, the optional ``ETHTOOL_A_PLCA_BURST_TMR`` attribute indicates how +many bit-times the PLCA RS waits for the MAC to initiate a new transmission +when aPLCAMaxBurstCount is greater than 0. If the MAC fails to send a new +frame within this time, the burst ends and the counting of TOs resumes. +Otherwise, the new frame is sent as part of the current burst. This option +is corresponding to ``IEEE 802.3cg-2019`` 30.16.1.1.7 aPLCABurstTimer. The +valid range for this attribute is [0 .. 255]. Although, the value should be +set greater than the Inter-Frame-Gap (IFG) time of the MAC (plus some margin) +for PLCA burst mode to work as intended. + +PLCA_SET_CFG +============ + +Sets PLCA RS parameters. + +Request contents: + + ====================================== ====== ============================= + ``ETHTOOL_A_PLCA_HEADER`` nested request header + ``ETHTOOL_A_PLCA_ENABLED`` u8 PLCA Admin State + ``ETHTOOL_A_PLCA_NODE_ID`` u8 PLCA unique local node ID + ``ETHTOOL_A_PLCA_NODE_CNT`` u8 Number of PLCA nodes on the + netkork, including the + coordinator + ``ETHTOOL_A_PLCA_TO_TMR`` u8 Transmit Opportunity Timer + value in bit-times (BT) + ``ETHTOOL_A_PLCA_BURST_CNT`` u8 Number of additional packets + the node is allowed to send + within a single TO + ``ETHTOOL_A_PLCA_BURST_TMR`` u8 Time to wait for the MAC to + transmit a new frame before + terminating the burst + ====================================== ====== ============================= + +For a description of each attribute, see ``PLCA_GET_CFG``. + +PLCA_GET_STATUS +=============== + +Gets PLCA RS status information. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_PLCA_HEADER`` nested request header + ===================================== ====== ========================== + +Kernel response contents: + + ====================================== ====== ============================= + ``ETHTOOL_A_PLCA_HEADER`` nested reply header + ``ETHTOOL_A_PLCA_STATUS`` u8 PLCA RS operational status + ====================================== ====== ============================= + +When set, the ``ETHTOOL_A_PLCA_STATUS`` attribute indicates whether the node is +detecting the presence of the BEACON on the network. This flag is +corresponding to ``IEEE 802.3cg-2019`` 30.16.1.1.2 aPLCAStatus. + Request translation =================== @@ -1817,4 +1952,7 @@ are netlink only. n/a ``ETHTOOL_MSG_PHC_VCLOCKS_GET`` n/a ``ETHTOOL_MSG_MODULE_GET`` n/a ``ETHTOOL_MSG_MODULE_SET`` + n/a ``ETHTOOL_MSG_PLCA_GET_CFG`` + n/a ``ETHTOOL_MSG_PLCA_SET_CFG`` + n/a ``ETHTOOL_MSG_PLCA_GET_STATUS`` =================================== ===================================== diff --git a/MAINTAINERS b/MAINTAINERS index e278cd5d0de0..c1062b42e680 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6099,7 +6099,7 @@ S: Supported F: Documentation/networking/devlink F: include/net/devlink.h F: include/uapi/linux/devlink.h -F: net/core/devlink.c +F: net/devlink/ DH ELECTRONICS IMX6 DHCOM/DHCOR BOARD SUPPORT M: Christoph Niedermaier <cniedermaier@dh-electronics.com> @@ -15584,6 +15584,13 @@ L: linux-mips@vger.kernel.org S: Maintained F: arch/mips/boot/dts/ralink/omega2p.dts +ONSEMI ETHERNET PHY DRIVERS +M: Piergiorgio Beruto <piergiorgio.beruto@gmail.com> +L: netdev@vger.kernel.org +S: Supported +W: http://www.onsemi.com +F: drivers/net/phy/ncn* + OP-TEE DRIVER M: Jens Wiklander <jens.wiklander@linaro.org> L: op-tee@lists.trustedfirmware.org @@ -16623,6 +16630,13 @@ S: Maintained F: Documentation/devicetree/bindings/iio/chemical/plantower,pms7003.yaml F: drivers/iio/chemical/pms7003.c +PLCA RECONCILIATION SUBLAYER (IEEE802.3 Clause 148) +M: Piergiorgio Beruto <piergiorgio.beruto@gmail.com> +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/phy/mdio-open-alliance.h +F: net/ethtool/plca.c + PLDMFW LIBRARY M: Jacob Keller <jacob.e.keller@intel.com> S: Maintained diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b808be77635e..8db6077febdd 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1003,6 +1003,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image u8 b2 = 0, b3 = 0; u8 *start_of_ldx; s64 jmp_offset; + s16 insn_off; u8 jmp_cond; u8 *func; int nops; @@ -1369,57 +1370,52 @@ st: if (is_imm8(insn->off)) case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_DW: case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + insn_off = insn->off; + if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { - /* Though the verifier prevents negative insn->off in BPF_PROBE_MEM - * add abs(insn->off) to the limit to make sure that negative - * offset won't be an issue. - * insn->off is s16, so it won't affect valid pointers. + /* Conservatively check that src_reg + insn->off is a kernel address: + * src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE + * src_reg is used as scratch for src_reg += insn->off and restored + * after emit_ldx if necessary */ - u64 limit = TASK_SIZE_MAX + PAGE_SIZE + abs(insn->off); - u8 *end_of_jmp1, *end_of_jmp2; - /* Conservatively check that src_reg + insn->off is a kernel address: - * 1. src_reg + insn->off >= limit - * 2. src_reg + insn->off doesn't become small positive. - * Cannot do src_reg + insn->off >= limit in one branch, - * since it needs two spare registers, but JIT has only one. + u64 limit = TASK_SIZE_MAX + PAGE_SIZE; + u8 *end_of_jmp; + + /* At end of these emitted checks, insn->off will have been added + * to src_reg, so no need to do relative load with insn->off offset */ + insn_off = 0; /* movabsq r11, limit */ EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG)); EMIT((u32)limit, 4); EMIT(limit >> 32, 4); + + if (insn->off) { + /* add src_reg, insn->off */ + maybe_emit_1mod(&prog, src_reg, true); + EMIT2_off32(0x81, add_1reg(0xC0, src_reg), insn->off); + } + /* cmp src_reg, r11 */ maybe_emit_mod(&prog, src_reg, AUX_REG, true); EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG)); - /* if unsigned '<' goto end_of_jmp2 */ - EMIT2(X86_JB, 0); - end_of_jmp1 = prog; - - /* mov r11, src_reg */ - emit_mov_reg(&prog, true, AUX_REG, src_reg); - /* add r11, insn->off */ - maybe_emit_1mod(&prog, AUX_REG, true); - EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off); - /* jmp if not carry to start_of_ldx - * Otherwise ERR_PTR(-EINVAL) + 128 will be the user addr - * that has to be rejected. - */ - EMIT2(0x73 /* JNC */, 0); - end_of_jmp2 = prog; + + /* if unsigned '>=', goto load */ + EMIT2(X86_JAE, 0); + end_of_jmp = prog; /* xor dst_reg, dst_reg */ emit_mov_imm32(&prog, false, dst_reg, 0); /* jmp byte_after_ldx */ EMIT2(0xEB, 0); - /* populate jmp_offset for JB above to jump to xor dst_reg */ - end_of_jmp1[-1] = end_of_jmp2 - end_of_jmp1; - /* populate jmp_offset for JNC above to jump to start_of_ldx */ + /* populate jmp_offset for JAE above to jump to start_of_ldx */ start_of_ldx = prog; - end_of_jmp2[-1] = start_of_ldx - end_of_jmp2; + end_of_jmp[-1] = start_of_ldx - end_of_jmp; } - emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); + emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off); if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { struct exception_table_entry *ex; u8 *_insn = image + proglen + (start_of_ldx - temp); @@ -1428,6 +1424,18 @@ st: if (is_imm8(insn->off)) /* populate jmp_offset for JMP above */ start_of_ldx[-1] = prog - start_of_ldx; + if (insn->off && src_reg != dst_reg) { + /* sub src_reg, insn->off + * Restore src_reg after "add src_reg, insn->off" in prev + * if statement. But if src_reg == dst_reg, emit_ldx + * above already clobbered src_reg, so no need to restore. + * If add src_reg, insn->off was unnecessary, no need to + * restore either. + */ + maybe_emit_1mod(&prog, src_reg, true); + EMIT2_off32(0x81, add_1reg(0xE8, src_reg), insn->off); + } + if (!bpf_prog->aux->extable) break; diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile index 49bf358b9c4f..1409e691ab77 100644 --- a/drivers/net/dsa/mv88e6xxx/Makefile +++ b/drivers/net/dsa/mv88e6xxx/Makefile @@ -15,6 +15,7 @@ mv88e6xxx-objs += port_hidden.o mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += ptp.o mv88e6xxx-objs += serdes.o mv88e6xxx-objs += smi.o +mv88e6xxx-objs += switchdev.o mv88e6xxx-objs += trace.o # for tracing framework to find trace.h diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 242b8b325504..1168ea75f5f5 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1728,11 +1728,11 @@ static int mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid, return err; } -static int mv88e6xxx_vtu_walk(struct mv88e6xxx_chip *chip, - int (*cb)(struct mv88e6xxx_chip *chip, - const struct mv88e6xxx_vtu_entry *entry, - void *priv), - void *priv) +int mv88e6xxx_vtu_walk(struct mv88e6xxx_chip *chip, + int (*cb)(struct mv88e6xxx_chip *chip, + const struct mv88e6xxx_vtu_entry *entry, + void *priv), + void *priv) { struct mv88e6xxx_vtu_entry entry = { .vid = mv88e6xxx_max_vid(chip), @@ -3884,6 +3884,24 @@ static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg) return err ? err : val; } +static int mv88e6xxx_mdio_read_c45(struct mii_bus *bus, int phy, int devad, + int reg) +{ + struct mv88e6xxx_mdio_bus *mdio_bus = bus->priv; + struct mv88e6xxx_chip *chip = mdio_bus->chip; + u16 val; + int err; + + if (!chip->info->ops->phy_read_c45) + return -EOPNOTSUPP; + + mv88e6xxx_reg_lock(chip); + err = chip->info->ops->phy_read_c45(chip, bus, phy, devad, reg, &val); + mv88e6xxx_reg_unlock(chip); + + return err ? err : val; +} + static int mv88e6xxx_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val) { struct mv88e6xxx_mdio_bus *mdio_bus = bus->priv; @@ -3900,6 +3918,23 @@ static int mv88e6xxx_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val) return err; } +static int mv88e6xxx_mdio_write_c45(struct mii_bus *bus, int phy, int devad, + int reg, u16 val) +{ + struct mv88e6xxx_mdio_bus *mdio_bus = bus->priv; + struct mv88e6xxx_chip *chip = mdio_bus->chip; + int err; + + if (!chip->info->ops->phy_write_c45) + return -EOPNOTSUPP; + + mv88e6xxx_reg_lock(chip); + err = chip->info->ops->phy_write_c45(chip, bus, phy, devad, reg, val); + mv88e6xxx_reg_unlock(chip); + + return err; +} + static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip, struct device_node *np, bool external) @@ -3938,6 +3973,8 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip, bus->read = mv88e6xxx_mdio_read; bus->write = mv88e6xxx_mdio_write; + bus->read_c45 = mv88e6xxx_mdio_read_c45; + bus->write_c45 = mv88e6xxx_mdio_write_c45; bus->parent = chip->dev; if (!external) { @@ -4149,8 +4186,10 @@ static const struct mv88e6xxx_ops mv88e6097_ops = { .ip_pri_map = mv88e6085_g1_ip_pri_map, .irl_init_all = mv88e6352_g2_irl_init_all, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6185_port_sync_link, .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, @@ -4198,8 +4237,10 @@ static const struct mv88e6xxx_ops mv88e6123_ops = { .ip_pri_map = mv88e6085_g1_ip_pri_map, .irl_init_all = mv88e6352_g2_irl_init_all, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, @@ -4279,8 +4320,10 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -4343,8 +4386,10 @@ static const struct mv88e6xxx_ops mv88e6161_ops = { .ip_pri_map = mv88e6085_g1_ip_pri_map, .irl_init_all = mv88e6352_g2_irl_init_all, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_speed_duplex = mv88e6185_port_set_speed_duplex, @@ -4426,8 +4471,10 @@ static const struct mv88e6xxx_ops mv88e6171_ops = { .ip_pri_map = mv88e6085_g1_ip_pri_map, .irl_init_all = mv88e6352_g2_irl_init_all, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, @@ -4472,8 +4519,10 @@ static const struct mv88e6xxx_ops mv88e6172_ops = { .get_eeprom = mv88e6xxx_g2_get_eeprom16, .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, @@ -4527,8 +4576,10 @@ static const struct mv88e6xxx_ops mv88e6175_ops = { .ip_pri_map = mv88e6085_g1_ip_pri_map, .irl_init_all = mv88e6352_g2_irl_init_all, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, @@ -4573,8 +4624,10 @@ static const struct mv88e6xxx_ops mv88e6176_ops = { .get_eeprom = mv88e6xxx_g2_get_eeprom16, .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, @@ -4673,8 +4726,10 @@ static const struct mv88e6xxx_ops mv88e6190_ops = { .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -4736,8 +4791,10 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = { .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -4799,8 +4856,10 @@ static const struct mv88e6xxx_ops mv88e6191_ops = { .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -4862,8 +4921,10 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { .get_eeprom = mv88e6xxx_g2_get_eeprom16, .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, @@ -4925,8 +4986,10 @@ static const struct mv88e6xxx_ops mv88e6250_ops = { .get_eeprom = mv88e6xxx_g2_get_eeprom16, .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, @@ -4964,8 +5027,10 @@ static const struct mv88e6xxx_ops mv88e6290_ops = { .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -5029,8 +5094,10 @@ static const struct mv88e6xxx_ops mv88e6320_ops = { .get_eeprom = mv88e6xxx_g2_get_eeprom16, .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6320_port_set_rgmii_delay, @@ -5074,8 +5141,10 @@ static const struct mv88e6xxx_ops mv88e6321_ops = { .get_eeprom = mv88e6xxx_g2_get_eeprom16, .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6320_port_set_rgmii_delay, @@ -5117,8 +5186,10 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -5183,8 +5254,10 @@ static const struct mv88e6xxx_ops mv88e6350_ops = { .ip_pri_map = mv88e6085_g1_ip_pri_map, .irl_init_all = mv88e6352_g2_irl_init_all, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, @@ -5227,8 +5300,10 @@ static const struct mv88e6xxx_ops mv88e6351_ops = { .ip_pri_map = mv88e6085_g1_ip_pri_map, .irl_init_all = mv88e6352_g2_irl_init_all, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, @@ -5275,8 +5350,10 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .get_eeprom = mv88e6xxx_g2_get_eeprom16, .set_eeprom = mv88e6xxx_g2_set_eeprom16, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay, @@ -5340,8 +5417,10 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -5407,8 +5486,10 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = { .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -5473,8 +5554,10 @@ static const struct mv88e6xxx_ops mv88e6393x_ops = { .get_eeprom = mv88e6xxx_g2_get_eeprom8, .set_eeprom = mv88e6xxx_g2_set_eeprom8, .set_switch_mac = mv88e6xxx_g2_set_switch_mac, - .phy_read = mv88e6xxx_g2_smi_phy_read, - .phy_write = mv88e6xxx_g2_smi_phy_write, + .phy_read = mv88e6xxx_g2_smi_phy_read_c22, + .phy_write = mv88e6xxx_g2_smi_phy_write_c22, + .phy_read_c45 = mv88e6xxx_g2_smi_phy_read_c45, + .phy_write_c45 = mv88e6xxx_g2_smi_phy_write_c45, .port_set_link = mv88e6xxx_port_set_link, .port_sync_link = mv88e6xxx_port_sync_link, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, @@ -6526,7 +6609,7 @@ static int mv88e6xxx_port_pre_bridge_flags(struct dsa_switch *ds, int port, const struct mv88e6xxx_ops *ops; if (flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | - BR_BCAST_FLOOD | BR_PORT_LOCKED)) + BR_BCAST_FLOOD | BR_PORT_LOCKED | BR_PORT_MAB)) return -EINVAL; ops = chip->info->ops; @@ -6545,7 +6628,7 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port, struct netlink_ext_ack *extack) { struct mv88e6xxx_chip *chip = ds->priv; - int err = -EOPNOTSUPP; + int err = 0; mv88e6xxx_reg_lock(chip); @@ -6584,6 +6667,12 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port, goto out; } + if (flags.mask & BR_PORT_MAB) { + bool mab = !!(flags.val & BR_PORT_MAB); + + mv88e6xxx_port_set_mab(chip, port, mab); + } + if (flags.mask & BR_PORT_LOCKED) { bool locked = !!(flags.val & BR_PORT_LOCKED); diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index e693154cf803..945a6696ad72 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -280,6 +280,9 @@ struct mv88e6xxx_port { unsigned int serdes_irq; char serdes_irq_name[64]; struct devlink_region *region; + + /* MacAuth Bypass control flag */ + bool mab; }; enum mv88e6xxx_region_id { @@ -451,6 +454,13 @@ struct mv88e6xxx_ops { struct mii_bus *bus, int addr, int reg, u16 val); + int (*phy_read_c45)(struct mv88e6xxx_chip *chip, + struct mii_bus *bus, + int addr, int devad, int reg, u16 *val); + int (*phy_write_c45)(struct mv88e6xxx_chip *chip, + struct mii_bus *bus, + int addr, int devad, int reg, u16 val); + /* Priority Override Table operations */ int (*pot_clear)(struct mv88e6xxx_chip *chip); @@ -784,6 +794,12 @@ static inline bool mv88e6xxx_is_invalid_port(struct mv88e6xxx_chip *chip, int po return (chip->info->invalid_port_mask & BIT(port)) != 0; } +static inline void mv88e6xxx_port_set_mab(struct mv88e6xxx_chip *chip, + int port, bool mab) +{ + chip->ports[port].mab = mab; +} + int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg, @@ -802,6 +818,12 @@ static inline void mv88e6xxx_reg_unlock(struct mv88e6xxx_chip *chip) mutex_unlock(&chip->reg_lock); } +int mv88e6xxx_vtu_walk(struct mv88e6xxx_chip *chip, + int (*cb)(struct mv88e6xxx_chip *chip, + const struct mv88e6xxx_vtu_entry *entry, + void *priv), + void *priv); + int mv88e6xxx_fid_map(struct mv88e6xxx_chip *chip, unsigned long *bitmap); #endif /* _MV88E6XXX_CHIP_H */ diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c index 61ae2d61e25c..ce3b3690c3c0 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_atu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c @@ -12,6 +12,7 @@ #include "chip.h" #include "global1.h" +#include "switchdev.h" #include "trace.h" /* Offset 0x01: ATU FID Register */ @@ -409,23 +410,25 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id) err = mv88e6xxx_g1_read_atu_violation(chip); if (err) - goto out; + goto out_unlock; err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_ATU_OP, &val); if (err) - goto out; + goto out_unlock; err = mv88e6xxx_g1_atu_fid_read(chip, &fid); if (err) - goto out; + goto out_unlock; err = mv88e6xxx_g1_atu_data_read(chip, &entry); if (err) - goto out; + goto out_unlock; err = mv88e6xxx_g1_atu_mac_read(chip, &entry); if (err) - goto out; + goto out_unlock; + + mv88e6xxx_reg_unlock(chip); spid = entry.state; @@ -441,6 +444,13 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id) entry.portvec, entry.mac, fid); chip->ports[spid].atu_miss_violation++; + + if (fid != MV88E6XXX_FID_STANDALONE && chip->ports[spid].mab) { + err = mv88e6xxx_handle_miss_violation(chip, spid, + &entry, fid); + if (err) + goto out; + } } if (val & MV88E6XXX_G1_ATU_OP_FULL_VIOLATION) { @@ -449,13 +459,13 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id) fid); chip->ports[spid].atu_full_violation++; } - mv88e6xxx_reg_unlock(chip); return IRQ_HANDLED; -out: +out_unlock: mv88e6xxx_reg_unlock(chip); +out: dev_err(chip->dev, "ATU problem: error %d while handling interrupt\n", err); return IRQ_HANDLED; diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index fa65ecd9cb85..ed3b2f88e783 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -739,20 +739,18 @@ static int mv88e6xxx_g2_smi_phy_read_data_c45(struct mv88e6xxx_chip *chip, return mv88e6xxx_g2_read(chip, MV88E6XXX_G2_SMI_PHY_DATA, data); } -static int mv88e6xxx_g2_smi_phy_read_c45(struct mv88e6xxx_chip *chip, - bool external, int port, int reg, - u16 *data) +static int _mv88e6xxx_g2_smi_phy_read_c45(struct mv88e6xxx_chip *chip, + bool external, int port, int devad, + int reg, u16 *data) { - int dev = (reg >> 16) & 0x1f; - int addr = reg & 0xffff; int err; - err = mv88e6xxx_g2_smi_phy_write_addr_c45(chip, external, port, dev, - addr); + err = mv88e6xxx_g2_smi_phy_write_addr_c45(chip, external, port, devad, + reg); if (err) return err; - return mv88e6xxx_g2_smi_phy_read_data_c45(chip, external, port, dev, + return mv88e6xxx_g2_smi_phy_read_data_c45(chip, external, port, devad, data); } @@ -771,51 +769,65 @@ static int mv88e6xxx_g2_smi_phy_write_data_c45(struct mv88e6xxx_chip *chip, return mv88e6xxx_g2_smi_phy_access_c45(chip, external, op, port, dev); } -static int mv88e6xxx_g2_smi_phy_write_c45(struct mv88e6xxx_chip *chip, - bool external, int port, int reg, - u16 data) +static int _mv88e6xxx_g2_smi_phy_write_c45(struct mv88e6xxx_chip *chip, + bool external, int port, int devad, + int reg, u16 data) { - int dev = (reg >> 16) & 0x1f; - int addr = reg & 0xffff; int err; - err = mv88e6xxx_g2_smi_phy_write_addr_c45(chip, external, port, dev, - addr); + err = mv88e6xxx_g2_smi_phy_write_addr_c45(chip, external, port, devad, + reg); if (err) return err; - return mv88e6xxx_g2_smi_phy_write_data_c45(chip, external, port, dev, + return mv88e6xxx_g2_smi_phy_write_data_c45(chip, external, port, devad, data); } -int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, struct mii_bus *bus, - int addr, int reg, u16 *val) +int mv88e6xxx_g2_smi_phy_read_c22(struct mv88e6xxx_chip *chip, + struct mii_bus *bus, + int addr, int reg, u16 *val) { struct mv88e6xxx_mdio_bus *mdio_bus = bus->priv; bool external = mdio_bus->external; - if (reg & MII_ADDR_C45) - return mv88e6xxx_g2_smi_phy_read_c45(chip, external, addr, reg, - val); - return mv88e6xxx_g2_smi_phy_read_data_c22(chip, external, addr, reg, val); } -int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, struct mii_bus *bus, - int addr, int reg, u16 val) +int mv88e6xxx_g2_smi_phy_read_c45(struct mv88e6xxx_chip *chip, + struct mii_bus *bus, int addr, int devad, + int reg, u16 *val) { struct mv88e6xxx_mdio_bus *mdio_bus = bus->priv; bool external = mdio_bus->external; - if (reg & MII_ADDR_C45) - return mv88e6xxx_g2_smi_phy_write_c45(chip, external, addr, reg, - val); + return _mv88e6xxx_g2_smi_phy_read_c45(chip, external, addr, devad, reg, + val); +} + +int mv88e6xxx_g2_smi_phy_write_c22(struct mv88e6xxx_chip *chip, + struct mii_bus *bus, int addr, int reg, + u16 val) +{ + struct mv88e6xxx_mdio_bus *mdio_bus = bus->priv; + bool external = mdio_bus->external; return mv88e6xxx_g2_smi_phy_write_data_c22(chip, external, addr, reg, val); } +int mv88e6xxx_g2_smi_phy_write_c45(struct mv88e6xxx_chip *chip, + struct mii_bus *bus, int addr, int devad, + int reg, u16 val) +{ + struct mv88e6xxx_mdio_bus *mdio_bus = bus->priv; + bool external = mdio_bus->external; + + return _mv88e6xxx_g2_smi_phy_write_c45(chip, external, addr, devad, reg, + val); +} + /* Offset 0x1B: Watchdog Control */ static int mv88e6097_watchdog_action(struct mv88e6xxx_chip *chip, int irq) { diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index 7536b8b0ad01..e973114d6890 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -314,12 +314,18 @@ int mv88e6xxx_g2_wait_bit(struct mv88e6xxx_chip *chip, int reg, int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port); int mv88e6390_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port); -int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, - struct mii_bus *bus, - int addr, int reg, u16 *val); -int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, - struct mii_bus *bus, - int addr, int reg, u16 val); +int mv88e6xxx_g2_smi_phy_read_c22(struct mv88e6xxx_chip *chip, + struct mii_bus *bus, + int addr, int reg, u16 *val); +int mv88e6xxx_g2_smi_phy_write_c22(struct mv88e6xxx_chip *chip, + struct mii_bus *bus, + int addr, int reg, u16 val); +int mv88e6xxx_g2_smi_phy_read_c45(struct mv88e6xxx_chip *chip, + struct mii_bus *bus, + int addr, int devad, int reg, u16 *val); +int mv88e6xxx_g2_smi_phy_write_c45(struct mv88e6xxx_chip *chip, + struct mii_bus *bus, + int addr, int devad, int reg, u16 val); int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr); int mv88e6xxx_g2_get_eeprom8(struct mv88e6xxx_chip *chip, diff --git a/drivers/net/dsa/mv88e6xxx/phy.c b/drivers/net/dsa/mv88e6xxx/phy.c index 252b5b3a3efe..8bb88b3d900d 100644 --- a/drivers/net/dsa/mv88e6xxx/phy.c +++ b/drivers/net/dsa/mv88e6xxx/phy.c @@ -55,6 +55,38 @@ int mv88e6xxx_phy_write(struct mv88e6xxx_chip *chip, int phy, int reg, u16 val) return chip->info->ops->phy_write(chip, bus, addr, reg, val); } +int mv88e6xxx_phy_read_c45(struct mv88e6xxx_chip *chip, int phy, int devad, + int reg, u16 *val) +{ + int addr = phy; /* PHY devices addresses start at 0x0 */ + struct mii_bus *bus; + + bus = mv88e6xxx_default_mdio_bus(chip); + if (!bus) + return -EOPNOTSUPP; + + if (!chip->info->ops->phy_read_c45) + return -EOPNOTSUPP; + + return chip->info->ops->phy_read_c45(chip, bus, addr, devad, reg, val); +} + +int mv88e6xxx_phy_write_c45(struct mv88e6xxx_chip *chip, int phy, int devad, + int reg, u16 val) +{ + int addr = phy; /* PHY devices addresses start at 0x0 */ + struct mii_bus *bus; + + bus = mv88e6xxx_default_mdio_bus(chip); + if (!bus) + return -EOPNOTSUPP; + + if (!chip->info->ops->phy_write_c45) + return -EOPNOTSUPP; + + return chip->info->ops->phy_write_c45(chip, bus, addr, devad, reg, val); +} + static int mv88e6xxx_phy_page_get(struct mv88e6xxx_chip *chip, int phy, u8 page) { return mv88e6xxx_phy_write(chip, phy, MV88E6XXX_PHY_PAGE, page); diff --git a/drivers/net/dsa/mv88e6xxx/phy.h b/drivers/net/dsa/mv88e6xxx/phy.h index 05ea0d546969..5f47722364cc 100644 --- a/drivers/net/dsa/mv88e6xxx/phy.h +++ b/drivers/net/dsa/mv88e6xxx/phy.h @@ -28,6 +28,10 @@ int mv88e6xxx_phy_read(struct mv88e6xxx_chip *chip, int phy, int reg, u16 *val); int mv88e6xxx_phy_write(struct mv88e6xxx_chip *chip, int phy, int reg, u16 val); +int mv88e6xxx_phy_read_c45(struct mv88e6xxx_chip *chip, int phy, int devad, + int reg, u16 *val); +int mv88e6xxx_phy_write_c45(struct mv88e6xxx_chip *chip, int phy, int devad, + int reg, u16 val); int mv88e6xxx_phy_page_read(struct mv88e6xxx_chip *chip, int phy, u8 page, int reg, u16 *val); int mv88e6xxx_phy_page_write(struct mv88e6xxx_chip *chip, int phy, diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index d94150d8f3f4..72faec8f44dc 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -36,17 +36,13 @@ static int mv88e6352_serdes_write(struct mv88e6xxx_chip *chip, int reg, static int mv88e6390_serdes_read(struct mv88e6xxx_chip *chip, int lane, int device, int reg, u16 *val) { - int reg_c45 = MII_ADDR_C45 | device << 16 | reg; - - return mv88e6xxx_phy_read(chip, lane, reg_c45, val); + return mv88e6xxx_phy_read_c45(chip, lane, device, reg, val); } static int mv88e6390_serdes_write(struct mv88e6xxx_chip *chip, int lane, int device, int reg, u16 val) { - int reg_c45 = MII_ADDR_C45 | device << 16 | reg; - - return mv88e6xxx_phy_write(chip, lane, reg_c45, val); + return mv88e6xxx_phy_write_c45(chip, lane, device, reg, val); } static int mv88e6xxx_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, diff --git a/drivers/net/dsa/mv88e6xxx/switchdev.c b/drivers/net/dsa/mv88e6xxx/switchdev.c new file mode 100644 index 000000000000..4c346a884fb2 --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/switchdev.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * switchdev.c + * + * Authors: + * Hans J. Schultz <netdev@kapio-technology.com> + * + */ + +#include <net/switchdev.h> +#include "chip.h" +#include "global1.h" +#include "switchdev.h" + +struct mv88e6xxx_fid_search_ctx { + u16 fid_search; + u16 vid_found; +}; + +static int __mv88e6xxx_find_vid(struct mv88e6xxx_chip *chip, + const struct mv88e6xxx_vtu_entry *entry, + void *priv) +{ + struct mv88e6xxx_fid_search_ctx *ctx = priv; + + if (ctx->fid_search == entry->fid) { + ctx->vid_found = entry->vid; + return 1; + } + + return 0; +} + +static int mv88e6xxx_find_vid(struct mv88e6xxx_chip *chip, u16 fid, u16 *vid) +{ + struct mv88e6xxx_fid_search_ctx ctx; + int err; + + ctx.fid_search = fid; + mv88e6xxx_reg_lock(chip); + err = mv88e6xxx_vtu_walk(chip, __mv88e6xxx_find_vid, &ctx); + mv88e6xxx_reg_unlock(chip); + if (err < 0) + return err; + if (err == 1) + *vid = ctx.vid_found; + else + return -ENOENT; + + return 0; +} + +int mv88e6xxx_handle_miss_violation(struct mv88e6xxx_chip *chip, int port, + struct mv88e6xxx_atu_entry *entry, u16 fid) +{ + struct switchdev_notifier_fdb_info info = { + .addr = entry->mac, + .locked = true, + }; + struct net_device *brport; + struct dsa_port *dp; + u16 vid; + int err; + + err = mv88e6xxx_find_vid(chip, fid, &vid); + if (err) + return err; + + info.vid = vid; + dp = dsa_to_port(chip->ds, port); + + rtnl_lock(); + brport = dsa_port_to_bridge_port(dp); + if (!brport) { + rtnl_unlock(); + return -ENODEV; + } + err = call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE, + brport, &info.info, NULL); + rtnl_unlock(); + + return err; +} diff --git a/drivers/net/dsa/mv88e6xxx/switchdev.h b/drivers/net/dsa/mv88e6xxx/switchdev.h new file mode 100644 index 000000000000..62214f9d62b0 --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/switchdev.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * + * switchdev.h + * + * Authors: + * Hans J. Schultz <netdev@kapio-technology.com> + * + */ + +#ifndef _MV88E6XXX_SWITCHDEV_H_ +#define _MV88E6XXX_SWITCHDEV_H_ + +#include "chip.h" + +int mv88e6xxx_handle_miss_violation(struct mv88e6xxx_chip *chip, int port, + struct mv88e6xxx_atu_entry *entry, + u16 fid); + +#endif /* _MV88E6XXX_SWITCHDEV_H_ */ diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index 3936543a74d8..e033d6c819f3 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -807,6 +807,9 @@ static int xgbe_set_speed(struct xgbe_prv_data *pdata, int speed) unsigned int ss; switch (speed) { + case SPEED_10: + ss = 0x07; + break; case SPEED_1000: ss = 0x03; break; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 0c5c1b155683..cde0f86791b6 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -274,6 +274,15 @@ static void xgbe_sgmii_1000_mode(struct xgbe_prv_data *pdata) pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_SGMII_1000); } +static void xgbe_sgmii_10_mode(struct xgbe_prv_data *pdata) +{ + /* Set MAC to 10M speed */ + pdata->hw_if.set_speed(pdata, SPEED_10); + + /* Call PHY implementation support to complete rate change */ + pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_SGMII_10); +} + static void xgbe_sgmii_100_mode(struct xgbe_prv_data *pdata) { /* Set MAC to 1G speed */ @@ -306,6 +315,9 @@ static void xgbe_change_mode(struct xgbe_prv_data *pdata, case XGBE_MODE_KR: xgbe_kr_mode(pdata); break; + case XGBE_MODE_SGMII_10: + xgbe_sgmii_10_mode(pdata); + break; case XGBE_MODE_SGMII_100: xgbe_sgmii_100_mode(pdata); break; @@ -1074,6 +1086,8 @@ static const char *xgbe_phy_fc_string(struct xgbe_prv_data *pdata) static const char *xgbe_phy_speed_string(int speed) { switch (speed) { + case SPEED_10: + return "10Mbps"; case SPEED_100: return "100Mbps"; case SPEED_1000: @@ -1161,6 +1175,7 @@ static int xgbe_phy_config_fixed(struct xgbe_prv_data *pdata) case XGBE_MODE_KX_1000: case XGBE_MODE_KX_2500: case XGBE_MODE_KR: + case XGBE_MODE_SGMII_10: case XGBE_MODE_SGMII_100: case XGBE_MODE_SGMII_1000: case XGBE_MODE_X: @@ -1222,6 +1237,8 @@ static int __xgbe_phy_config_aneg(struct xgbe_prv_data *pdata, bool set_mode) xgbe_set_mode(pdata, XGBE_MODE_SGMII_1000); } else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_100)) { xgbe_set_mode(pdata, XGBE_MODE_SGMII_100); + } else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_10)) { + xgbe_set_mode(pdata, XGBE_MODE_SGMII_10); } else { enable_irq(pdata->an_irq); ret = -EINVAL; @@ -1301,6 +1318,9 @@ static void xgbe_phy_status_result(struct xgbe_prv_data *pdata) mode = xgbe_phy_status_aneg(pdata); switch (mode) { + case XGBE_MODE_SGMII_10: + pdata->phy.speed = SPEED_10; + break; case XGBE_MODE_SGMII_100: pdata->phy.speed = SPEED_100; break; @@ -1443,6 +1463,8 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata) xgbe_sgmii_1000_mode(pdata); } else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_100)) { xgbe_sgmii_100_mode(pdata); + } else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_10)) { + xgbe_sgmii_10_mode(pdata); } else { ret = -EINVAL; goto err_irq; @@ -1540,6 +1562,8 @@ static int xgbe_phy_best_advertised_speed(struct xgbe_prv_data *pdata) return SPEED_1000; else if (XGBE_ADV(lks, 100baseT_Full)) return SPEED_100; + else if (XGBE_ADV(lks, 10baseT_Full)) + return SPEED_10; return SPEED_UNKNOWN; } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index c731a04731f8..de7118cb10b8 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -124,6 +124,7 @@ #include "xgbe.h" #include "xgbe-common.h" +#define XGBE_PHY_PORT_SPEED_10 BIT(0) #define XGBE_PHY_PORT_SPEED_100 BIT(1) #define XGBE_PHY_PORT_SPEED_1000 BIT(2) #define XGBE_PHY_PORT_SPEED_2500 BIT(3) @@ -759,6 +760,8 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata) XGBE_SET_SUP(lks, Pause); XGBE_SET_SUP(lks, Asym_Pause); if (phy_data->sfp_base == XGBE_SFP_BASE_1000_T) { + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10) + XGBE_SET_SUP(lks, 10baseT_Full); if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) XGBE_SET_SUP(lks, 100baseT_Full); if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) @@ -1542,6 +1545,16 @@ static enum xgbe_mode xgbe_phy_an37_sgmii_outcome(struct xgbe_prv_data *pdata) xgbe_phy_phydev_flowctrl(pdata); switch (pdata->an_status & XGBE_SGMII_AN_LINK_SPEED) { + case XGBE_SGMII_AN_LINK_SPEED_10: + if (pdata->an_status & XGBE_SGMII_AN_LINK_DUPLEX) { + XGBE_SET_LP_ADV(lks, 10baseT_Full); + mode = XGBE_MODE_SGMII_10; + } else { + /* Half-duplex not supported */ + XGBE_SET_LP_ADV(lks, 10baseT_Half); + mode = XGBE_MODE_UNKNOWN; + } + break; case XGBE_SGMII_AN_LINK_SPEED_100: if (pdata->an_status & XGBE_SGMII_AN_LINK_DUPLEX) { XGBE_SET_LP_ADV(lks, 100baseT_Full); @@ -1658,7 +1671,10 @@ static enum xgbe_mode xgbe_phy_an73_redrv_outcome(struct xgbe_prv_data *pdata) switch (phy_data->sfp_base) { case XGBE_SFP_BASE_1000_T: if (phy_data->phydev && - (phy_data->phydev->speed == SPEED_100)) + (phy_data->phydev->speed == SPEED_10)) + mode = XGBE_MODE_SGMII_10; + else if (phy_data->phydev && + (phy_data->phydev->speed == SPEED_100)) mode = XGBE_MODE_SGMII_100; else mode = XGBE_MODE_SGMII_1000; @@ -1673,7 +1689,10 @@ static enum xgbe_mode xgbe_phy_an73_redrv_outcome(struct xgbe_prv_data *pdata) break; default: if (phy_data->phydev && - (phy_data->phydev->speed == SPEED_100)) + (phy_data->phydev->speed == SPEED_10)) + mode = XGBE_MODE_SGMII_10; + else if (phy_data->phydev && + (phy_data->phydev->speed == SPEED_100)) mode = XGBE_MODE_SGMII_100; else mode = XGBE_MODE_SGMII_1000; @@ -2127,6 +2146,20 @@ static void xgbe_phy_sgmii_100_mode(struct xgbe_prv_data *pdata) netif_dbg(pdata, link, pdata->netdev, "100MbE SGMII mode set\n"); } +static void xgbe_phy_sgmii_10_mode(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + xgbe_phy_set_redrv_mode(pdata); + + /* 10M/SGMII */ + xgbe_phy_perform_ratechange(pdata, XGBE_MB_CMD_SET_1G, XGBE_MB_SUBCMD_10MBITS); + + phy_data->cur_mode = XGBE_MODE_SGMII_10; + + netif_dbg(pdata, link, pdata->netdev, "10MbE SGMII mode set\n"); +} + static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; @@ -2185,6 +2218,7 @@ static enum xgbe_mode xgbe_phy_switch_baset_mode(struct xgbe_prv_data *pdata) return xgbe_phy_cur_mode(pdata); switch (xgbe_phy_cur_mode(pdata)) { + case XGBE_MODE_SGMII_10: case XGBE_MODE_SGMII_100: case XGBE_MODE_SGMII_1000: return XGBE_MODE_KR; @@ -2252,6 +2286,8 @@ static enum xgbe_mode xgbe_phy_get_baset_mode(struct xgbe_phy_data *phy_data, int speed) { switch (speed) { + case SPEED_10: + return XGBE_MODE_SGMII_10; case SPEED_100: return XGBE_MODE_SGMII_100; case SPEED_1000: @@ -2269,6 +2305,8 @@ static enum xgbe_mode xgbe_phy_get_sfp_mode(struct xgbe_phy_data *phy_data, int speed) { switch (speed) { + case SPEED_10: + return XGBE_MODE_SGMII_10; case SPEED_100: return XGBE_MODE_SGMII_100; case SPEED_1000: @@ -2343,6 +2381,9 @@ static void xgbe_phy_set_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode) case XGBE_MODE_KR: xgbe_phy_kr_mode(pdata); break; + case XGBE_MODE_SGMII_10: + xgbe_phy_sgmii_10_mode(pdata); + break; case XGBE_MODE_SGMII_100: xgbe_phy_sgmii_100_mode(pdata); break; @@ -2399,6 +2440,9 @@ static bool xgbe_phy_use_baset_mode(struct xgbe_prv_data *pdata, struct ethtool_link_ksettings *lks = &pdata->phy.lks; switch (mode) { + case XGBE_MODE_SGMII_10: + return xgbe_phy_check_mode(pdata, mode, + XGBE_ADV(lks, 10baseT_Full)); case XGBE_MODE_SGMII_100: return xgbe_phy_check_mode(pdata, mode, XGBE_ADV(lks, 100baseT_Full)); @@ -2428,6 +2472,11 @@ static bool xgbe_phy_use_sfp_mode(struct xgbe_prv_data *pdata, return false; return xgbe_phy_check_mode(pdata, mode, XGBE_ADV(lks, 1000baseX_Full)); + case XGBE_MODE_SGMII_10: + if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T) + return false; + return xgbe_phy_check_mode(pdata, mode, + XGBE_ADV(lks, 10baseT_Full)); case XGBE_MODE_SGMII_100: if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T) return false; @@ -2520,10 +2569,17 @@ static bool xgbe_phy_valid_speed_basex_mode(struct xgbe_phy_data *phy_data, } } -static bool xgbe_phy_valid_speed_baset_mode(struct xgbe_phy_data *phy_data, +static bool xgbe_phy_valid_speed_baset_mode(struct xgbe_prv_data *pdata, int speed) { + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int ver; + switch (speed) { + case SPEED_10: + /* Supported in ver >= 30H */ + ver = XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER); + return (ver >= 0x30) ? true : false; case SPEED_100: case SPEED_1000: return true; @@ -2536,10 +2592,17 @@ static bool xgbe_phy_valid_speed_baset_mode(struct xgbe_phy_data *phy_data, } } -static bool xgbe_phy_valid_speed_sfp_mode(struct xgbe_phy_data *phy_data, +static bool xgbe_phy_valid_speed_sfp_mode(struct xgbe_prv_data *pdata, int speed) { + struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int ver; + switch (speed) { + case SPEED_10: + /* Supported in ver >= 30H */ + ver = XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER); + return (ver >= 0x30) && (phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000); case SPEED_100: return (phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000); case SPEED_1000: @@ -2586,12 +2649,12 @@ static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed) case XGBE_PORT_MODE_1000BASE_T: case XGBE_PORT_MODE_NBASE_T: case XGBE_PORT_MODE_10GBASE_T: - return xgbe_phy_valid_speed_baset_mode(phy_data, speed); + return xgbe_phy_valid_speed_baset_mode(pdata, speed); case XGBE_PORT_MODE_1000BASE_X: case XGBE_PORT_MODE_10GBASE_R: return xgbe_phy_valid_speed_basex_mode(phy_data, speed); case XGBE_PORT_MODE_SFP: - return xgbe_phy_valid_speed_sfp_mode(phy_data, speed); + return xgbe_phy_valid_speed_sfp_mode(pdata, speed); default: return false; } @@ -2862,6 +2925,12 @@ static int xgbe_phy_mdio_reset_setup(struct xgbe_prv_data *pdata) static bool xgbe_phy_port_mode_mismatch(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; + unsigned int ver; + + /* 10 Mbps speed is not supported in ver < 30H */ + ver = XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER); + if (ver < 0x30 && (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10)) + return true; switch (phy_data->port_mode) { case XGBE_PORT_MODE_BACKPLANE: @@ -2875,7 +2944,8 @@ static bool xgbe_phy_port_mode_mismatch(struct xgbe_prv_data *pdata) return false; break; case XGBE_PORT_MODE_1000BASE_T: - if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || + if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)) return false; break; @@ -2884,13 +2954,15 @@ static bool xgbe_phy_port_mode_mismatch(struct xgbe_prv_data *pdata) return false; break; case XGBE_PORT_MODE_NBASE_T: - if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || + if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) || (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_2500)) return false; break; case XGBE_PORT_MODE_10GBASE_T: - if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || + if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) || (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)) return false; @@ -2900,7 +2972,8 @@ static bool xgbe_phy_port_mode_mismatch(struct xgbe_prv_data *pdata) return false; break; case XGBE_PORT_MODE_SFP: - if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || + if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10) || + (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) || (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) || (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)) return false; @@ -3269,6 +3342,10 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata) XGBE_SET_SUP(lks, Pause); XGBE_SET_SUP(lks, Asym_Pause); XGBE_SET_SUP(lks, TP); + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10) { + XGBE_SET_SUP(lks, 10baseT_Full); + phy_data->start_mode = XGBE_MODE_SGMII_10; + } if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) { XGBE_SET_SUP(lks, 100baseT_Full); phy_data->start_mode = XGBE_MODE_SGMII_100; @@ -3299,6 +3376,10 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata) XGBE_SET_SUP(lks, Pause); XGBE_SET_SUP(lks, Asym_Pause); XGBE_SET_SUP(lks, TP); + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10) { + XGBE_SET_SUP(lks, 10baseT_Full); + phy_data->start_mode = XGBE_MODE_SGMII_10; + } if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) { XGBE_SET_SUP(lks, 100baseT_Full); phy_data->start_mode = XGBE_MODE_SGMII_100; @@ -3321,6 +3402,10 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata) XGBE_SET_SUP(lks, Pause); XGBE_SET_SUP(lks, Asym_Pause); XGBE_SET_SUP(lks, TP); + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10) { + XGBE_SET_SUP(lks, 10baseT_Full); + phy_data->start_mode = XGBE_MODE_SGMII_10; + } if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) { XGBE_SET_SUP(lks, 100baseT_Full); phy_data->start_mode = XGBE_MODE_SGMII_100; @@ -3361,6 +3446,8 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata) XGBE_SET_SUP(lks, Asym_Pause); XGBE_SET_SUP(lks, TP); XGBE_SET_SUP(lks, FIBRE); + if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10) + phy_data->start_mode = XGBE_MODE_SGMII_10; if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) phy_data->start_mode = XGBE_MODE_SGMII_100; if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 71f24cb47935..da37476a2848 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -293,6 +293,7 @@ #define XGBE_SGMII_AN_LINK_STATUS BIT(1) #define XGBE_SGMII_AN_LINK_SPEED (BIT(2) | BIT(3)) +#define XGBE_SGMII_AN_LINK_SPEED_10 0x00 #define XGBE_SGMII_AN_LINK_SPEED_100 0x04 #define XGBE_SGMII_AN_LINK_SPEED_1000 0x08 #define XGBE_SGMII_AN_LINK_DUPLEX BIT(4) @@ -594,6 +595,7 @@ enum xgbe_mode { XGBE_MODE_KX_2500, XGBE_MODE_KR, XGBE_MODE_X, + XGBE_MODE_SGMII_10, XGBE_MODE_SGMII_100, XGBE_MODE_SGMII_1000, XGBE_MODE_SFI, diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index d30d11872719..306393f8eeca 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1905,7 +1905,6 @@ static void alx_remove(struct pci_dev *pdev) free_netdev(alx->dev); } -#ifdef CONFIG_PM_SLEEP static int alx_suspend(struct device *dev) { struct alx_priv *alx = dev_get_drvdata(dev); @@ -1951,12 +1950,7 @@ unlock: return err; } -static SIMPLE_DEV_PM_OPS(alx_pm_ops, alx_suspend, alx_resume); -#define ALX_PM_OPS (&alx_pm_ops) -#else -#define ALX_PM_OPS NULL -#endif - +static DEFINE_SIMPLE_DEV_PM_OPS(alx_pm_ops, alx_suspend, alx_resume); static pci_ers_result_t alx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) @@ -2055,7 +2049,7 @@ static struct pci_driver alx_driver = { .probe = alx_probe, .remove = alx_remove, .err_handler = &alx_err_handlers, - .driver.pm = ALX_PM_OPS, + .driver.pm = pm_sleep_ptr(&alx_pm_ops), }; module_pci_driver(alx_driver); diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 3a79ead5219a..5ad0b259e623 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1305,6 +1305,10 @@ static int enetc_xdp_frame_to_xdp_tx_swbd(struct enetc_bdr *tx_ring, xdp_tx_swbd->xdp_frame = NULL; n++; + + if (!xdp_frame_has_frags(xdp_frame)) + goto out; + xdp_tx_swbd = &xdp_tx_arr[n]; shinfo = xdp_get_shared_info_from_frame(xdp_frame); @@ -1334,7 +1338,7 @@ static int enetc_xdp_frame_to_xdp_tx_swbd(struct enetc_bdr *tx_ring, n++; xdp_tx_swbd = &xdp_tx_arr[n]; } - +out: xdp_tx_arr[n - 1].is_eof = true; xdp_tx_arr[n - 1].xdp_frame = xdp_frame; @@ -1390,16 +1394,12 @@ static void enetc_map_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i, { struct enetc_rx_swbd *rx_swbd = enetc_get_rx_buff(rx_ring, i, size); void *hard_start = page_address(rx_swbd->page) + rx_swbd->page_offset; - struct skb_shared_info *shinfo; /* To be used for XDP_TX */ rx_swbd->len = size; xdp_prepare_buff(xdp_buff, hard_start - rx_ring->buffer_offset, rx_ring->buffer_offset, size, false); - - shinfo = xdp_get_shared_info_from_buff(xdp_buff); - shinfo->nr_frags = 0; } static void enetc_add_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i, @@ -1407,11 +1407,23 @@ static void enetc_add_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i, { struct skb_shared_info *shinfo = xdp_get_shared_info_from_buff(xdp_buff); struct enetc_rx_swbd *rx_swbd = enetc_get_rx_buff(rx_ring, i, size); - skb_frag_t *frag = &shinfo->frags[shinfo->nr_frags]; + skb_frag_t *frag; /* To be used for XDP_TX */ rx_swbd->len = size; + if (!xdp_buff_has_frags(xdp_buff)) { + xdp_buff_set_frags_flag(xdp_buff); + shinfo->xdp_frags_size = size; + shinfo->nr_frags = 0; + } else { + shinfo->xdp_frags_size += size; + } + + if (page_is_pfmemalloc(rx_swbd->page)) + xdp_buff_set_frag_pfmemalloc(xdp_buff); + + frag = &shinfo->frags[shinfo->nr_frags]; skb_frag_off_set(frag, rx_swbd->page_offset); skb_frag_size_set(frag, size); __skb_frag_set_page(frag, rx_swbd->page); @@ -1584,20 +1596,6 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, } break; case XDP_REDIRECT: - /* xdp_return_frame does not support S/G in the sense - * that it leaks the fragments (__xdp_return should not - * call page_frag_free only for the initial buffer). - * Until XDP_REDIRECT gains support for S/G let's keep - * the code structure in place, but dead. We drop the - * S/G frames ourselves to avoid memory leaks which - * would otherwise leave the kernel OOM. - */ - if (unlikely(cleaned_cnt - orig_cleaned_cnt != 1)) { - enetc_xdp_drop(rx_ring, orig_i, i); - rx_ring->stats.xdp_redirect_sg++; - break; - } - err = xdp_do_redirect(rx_ring->ndev, &xdp_buff, prog); if (unlikely(err)) { enetc_xdp_drop(rx_ring, orig_i, i); diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index c6d8cc15c270..416e4138dbaf 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -70,7 +70,6 @@ struct enetc_ring_stats { unsigned int xdp_tx_drops; unsigned int xdp_redirect; unsigned int xdp_redirect_failures; - unsigned int xdp_redirect_sg; unsigned int recycles; unsigned int recycle_failures; unsigned int win_drop; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index c8369e3752b0..d45f305eb03c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -197,7 +197,6 @@ static const char rx_ring_stats[][ETH_GSTRING_LEN] = { "Rx ring %2d recycle failures", "Rx ring %2d redirects", "Rx ring %2d redirect failures", - "Rx ring %2d redirect S/G", }; static const char tx_ring_stats[][ETH_GSTRING_LEN] = { @@ -291,7 +290,6 @@ static void enetc_get_ethtool_stats(struct net_device *ndev, data[o++] = priv->rx_ring[i]->stats.recycle_failures; data[o++] = priv->rx_ring[i]->stats.xdp_redirect; data[o++] = priv->rx_ring[i]->stats.xdp_redirect_failures; - data[o++] = priv->rx_ring[i]->stats.xdp_redirect_sg; } if (!enetc_si_is_pf(priv->si)) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 644f3c963730..e6238e53940d 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1987,47 +1987,74 @@ static int fec_enet_mdio_wait(struct fec_enet_private *fep) return ret; } -static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) +static int fec_enet_mdio_read_c22(struct mii_bus *bus, int mii_id, int regnum) { struct fec_enet_private *fep = bus->priv; struct device *dev = &fep->pdev->dev; int ret = 0, frame_start, frame_addr, frame_op; - bool is_c45 = !!(regnum & MII_ADDR_C45); ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; - if (is_c45) { - frame_start = FEC_MMFR_ST_C45; + /* C22 read */ + frame_op = FEC_MMFR_OP_READ; + frame_start = FEC_MMFR_ST; + frame_addr = regnum; - /* write address */ - frame_addr = (regnum >> 16); - writel(frame_start | FEC_MMFR_OP_ADDR_WRITE | - FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(frame_addr) | - FEC_MMFR_TA | (regnum & 0xFFFF), - fep->hwp + FEC_MII_DATA); + /* start a read op */ + writel(frame_start | frame_op | + FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(frame_addr) | + FEC_MMFR_TA, fep->hwp + FEC_MII_DATA); - /* wait for end of transfer */ - ret = fec_enet_mdio_wait(fep); - if (ret) { - netdev_err(fep->netdev, "MDIO address write timeout\n"); - goto out; - } + /* wait for end of transfer */ + ret = fec_enet_mdio_wait(fep); + if (ret) { + netdev_err(fep->netdev, "MDIO read timeout\n"); + goto out; + } - frame_op = FEC_MMFR_OP_READ_C45; + ret = FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); - } else { - /* C22 read */ - frame_op = FEC_MMFR_OP_READ; - frame_start = FEC_MMFR_ST; - frame_addr = regnum; +out: + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + + return ret; +} + +static int fec_enet_mdio_read_c45(struct mii_bus *bus, int mii_id, + int devad, int regnum) +{ + struct fec_enet_private *fep = bus->priv; + struct device *dev = &fep->pdev->dev; + int ret = 0, frame_start, frame_op; + + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) + return ret; + + frame_start = FEC_MMFR_ST_C45; + + /* write address */ + writel(frame_start | FEC_MMFR_OP_ADDR_WRITE | + FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(devad) | + FEC_MMFR_TA | (regnum & 0xFFFF), + fep->hwp + FEC_MII_DATA); + + /* wait for end of transfer */ + ret = fec_enet_mdio_wait(fep); + if (ret) { + netdev_err(fep->netdev, "MDIO address write timeout\n"); + goto out; } + frame_op = FEC_MMFR_OP_READ_C45; + /* start a read op */ writel(frame_start | frame_op | - FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(frame_addr) | - FEC_MMFR_TA, fep->hwp + FEC_MII_DATA); + FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(devad) | + FEC_MMFR_TA, fep->hwp + FEC_MII_DATA); /* wait for end of transfer */ ret = fec_enet_mdio_wait(fep); @@ -2045,45 +2072,69 @@ out: return ret; } -static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, - u16 value) +static int fec_enet_mdio_write_c22(struct mii_bus *bus, int mii_id, int regnum, + u16 value) { struct fec_enet_private *fep = bus->priv; struct device *dev = &fep->pdev->dev; int ret, frame_start, frame_addr; - bool is_c45 = !!(regnum & MII_ADDR_C45); ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret; - if (is_c45) { - frame_start = FEC_MMFR_ST_C45; + /* C22 write */ + frame_start = FEC_MMFR_ST; + frame_addr = regnum; - /* write address */ - frame_addr = (regnum >> 16); - writel(frame_start | FEC_MMFR_OP_ADDR_WRITE | - FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(frame_addr) | - FEC_MMFR_TA | (regnum & 0xFFFF), - fep->hwp + FEC_MII_DATA); + /* start a write op */ + writel(frame_start | FEC_MMFR_OP_WRITE | + FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(frame_addr) | + FEC_MMFR_TA | FEC_MMFR_DATA(value), + fep->hwp + FEC_MII_DATA); - /* wait for end of transfer */ - ret = fec_enet_mdio_wait(fep); - if (ret) { - netdev_err(fep->netdev, "MDIO address write timeout\n"); - goto out; - } - } else { - /* C22 write */ - frame_start = FEC_MMFR_ST; - frame_addr = regnum; + /* wait for end of transfer */ + ret = fec_enet_mdio_wait(fep); + if (ret) + netdev_err(fep->netdev, "MDIO write timeout\n"); + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + + return ret; +} + +static int fec_enet_mdio_write_c45(struct mii_bus *bus, int mii_id, + int devad, int regnum, u16 value) +{ + struct fec_enet_private *fep = bus->priv; + struct device *dev = &fep->pdev->dev; + int ret, frame_start; + + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) + return ret; + + frame_start = FEC_MMFR_ST_C45; + + /* write address */ + writel(frame_start | FEC_MMFR_OP_ADDR_WRITE | + FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(devad) | + FEC_MMFR_TA | (regnum & 0xFFFF), + fep->hwp + FEC_MII_DATA); + + /* wait for end of transfer */ + ret = fec_enet_mdio_wait(fep); + if (ret) { + netdev_err(fep->netdev, "MDIO address write timeout\n"); + goto out; } /* start a write op */ writel(frame_start | FEC_MMFR_OP_WRITE | - FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(frame_addr) | - FEC_MMFR_TA | FEC_MMFR_DATA(value), - fep->hwp + FEC_MII_DATA); + FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(devad) | + FEC_MMFR_TA | FEC_MMFR_DATA(value), + fep->hwp + FEC_MII_DATA); /* wait for end of transfer */ ret = fec_enet_mdio_wait(fep); @@ -2381,8 +2432,10 @@ static int fec_enet_mii_init(struct platform_device *pdev) } fep->mii_bus->name = "fec_enet_mii_bus"; - fep->mii_bus->read = fec_enet_mdio_read; - fep->mii_bus->write = fec_enet_mdio_write; + fep->mii_bus->read = fec_enet_mdio_read_c22; + fep->mii_bus->write = fec_enet_mdio_write_c22; + fep->mii_bus->read_c45 = fec_enet_mdio_read_c45; + fep->mii_bus->write_c45 = fec_enet_mdio_write_c45; snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", pdev->name, fep->dev_id + 1); fep->mii_bus->priv = fep; diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index d7d39a58cd80..8b5a4cd8ff08 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -128,30 +128,49 @@ static int xgmac_wait_until_done(struct device *dev, return 0; } -/* - * Write value to the PHY for this device to the register at regnum,waiting - * until the write is done before it returns. All PHY configuration has to be - * done through the TSEC1 MIIM regs. - */ -static int xgmac_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value) +static int xgmac_mdio_write_c22(struct mii_bus *bus, int phy_id, int regnum, + u16 value) { struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv; struct tgec_mdio_controller __iomem *regs = priv->mdio_base; - uint16_t dev_addr; + bool endian = priv->is_little_endian; + u16 dev_addr = regnum & 0x1f; u32 mdio_ctl, mdio_stat; int ret; + + mdio_stat = xgmac_read32(®s->mdio_stat, endian); + mdio_stat &= ~MDIO_STAT_ENC; + xgmac_write32(mdio_stat, ®s->mdio_stat, endian); + + ret = xgmac_wait_until_free(&bus->dev, regs, endian); + if (ret) + return ret; + + /* Set the port and dev addr */ + mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr); + xgmac_write32(mdio_ctl, ®s->mdio_ctl, endian); + + /* Write the value to the register */ + xgmac_write32(MDIO_DATA(value), ®s->mdio_data, endian); + + ret = xgmac_wait_until_done(&bus->dev, regs, endian); + if (ret) + return ret; + + return 0; +} + +static int xgmac_mdio_write_c45(struct mii_bus *bus, int phy_id, int dev_addr, + int regnum, u16 value) +{ + struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv; + struct tgec_mdio_controller __iomem *regs = priv->mdio_base; bool endian = priv->is_little_endian; + u32 mdio_ctl, mdio_stat; + int ret; mdio_stat = xgmac_read32(®s->mdio_stat, endian); - if (regnum & MII_ADDR_C45) { - /* Clause 45 (ie 10G) */ - dev_addr = (regnum >> 16) & 0x1f; - mdio_stat |= MDIO_STAT_ENC; - } else { - /* Clause 22 (ie 1G) */ - dev_addr = regnum & 0x1f; - mdio_stat &= ~MDIO_STAT_ENC; - } + mdio_stat |= MDIO_STAT_ENC; xgmac_write32(mdio_stat, ®s->mdio_stat, endian); @@ -164,13 +183,11 @@ static int xgmac_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 val xgmac_write32(mdio_ctl, ®s->mdio_ctl, endian); /* Set the register address */ - if (regnum & MII_ADDR_C45) { - xgmac_write32(regnum & 0xffff, ®s->mdio_addr, endian); + xgmac_write32(regnum & 0xffff, ®s->mdio_addr, endian); - ret = xgmac_wait_until_free(&bus->dev, regs, endian); - if (ret) - return ret; - } + ret = xgmac_wait_until_free(&bus->dev, regs, endian); + if (ret) + return ret; /* Write the value to the register */ xgmac_write32(MDIO_DATA(value), ®s->mdio_data, endian); @@ -182,31 +199,82 @@ static int xgmac_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 val return 0; } -/* - * Reads from register regnum in the PHY for device dev, returning the value. +/* Reads from register regnum in the PHY for device dev, returning the value. * Clears miimcom first. All PHY configuration has to be done through the * TSEC1 MIIM regs. */ -static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) +static int xgmac_mdio_read_c22(struct mii_bus *bus, int phy_id, int regnum) { struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv; struct tgec_mdio_controller __iomem *regs = priv->mdio_base; + bool endian = priv->is_little_endian; + u16 dev_addr = regnum & 0x1f; unsigned long flags; - uint16_t dev_addr; uint32_t mdio_stat; uint32_t mdio_ctl; int ret; - bool endian = priv->is_little_endian; mdio_stat = xgmac_read32(®s->mdio_stat, endian); - if (regnum & MII_ADDR_C45) { - dev_addr = (regnum >> 16) & 0x1f; - mdio_stat |= MDIO_STAT_ENC; + mdio_stat &= ~MDIO_STAT_ENC; + xgmac_write32(mdio_stat, ®s->mdio_stat, endian); + + ret = xgmac_wait_until_free(&bus->dev, regs, endian); + if (ret) + return ret; + + /* Set the Port and Device Addrs */ + mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr); + xgmac_write32(mdio_ctl, ®s->mdio_ctl, endian); + + if (priv->has_a009885) + /* Once the operation completes, i.e. MDIO_STAT_BSY clears, we + * must read back the data register within 16 MDC cycles. + */ + local_irq_save(flags); + + /* Initiate the read */ + xgmac_write32(mdio_ctl | MDIO_CTL_READ, ®s->mdio_ctl, endian); + + ret = xgmac_wait_until_done(&bus->dev, regs, endian); + if (ret) + goto irq_restore; + + /* Return all Fs if nothing was there */ + if ((xgmac_read32(®s->mdio_stat, endian) & MDIO_STAT_RD_ER) && + !priv->has_a011043) { + dev_dbg(&bus->dev, + "Error while reading PHY%d reg at %d.%d\n", + phy_id, dev_addr, regnum); + ret = 0xffff; } else { - dev_addr = regnum & 0x1f; - mdio_stat &= ~MDIO_STAT_ENC; + ret = xgmac_read32(®s->mdio_data, endian) & 0xffff; + dev_dbg(&bus->dev, "read %04x\n", ret); } +irq_restore: + if (priv->has_a009885) + local_irq_restore(flags); + + return ret; +} + +/* Reads from register regnum in the PHY for device dev, returning the value. + * Clears miimcom first. All PHY configuration has to be done through the + * TSEC1 MIIM regs. + */ +static int xgmac_mdio_read_c45(struct mii_bus *bus, int phy_id, int dev_addr, + int regnum) +{ + struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv; + struct tgec_mdio_controller __iomem *regs = priv->mdio_base; + bool endian = priv->is_little_endian; + u32 mdio_stat, mdio_ctl; + unsigned long flags; + int ret; + + mdio_stat = xgmac_read32(®s->mdio_stat, endian); + mdio_stat |= MDIO_STAT_ENC; + xgmac_write32(mdio_stat, ®s->mdio_stat, endian); ret = xgmac_wait_until_free(&bus->dev, regs, endian); @@ -218,13 +286,11 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) xgmac_write32(mdio_ctl, ®s->mdio_ctl, endian); /* Set the register address */ - if (regnum & MII_ADDR_C45) { - xgmac_write32(regnum & 0xffff, ®s->mdio_addr, endian); + xgmac_write32(regnum & 0xffff, ®s->mdio_addr, endian); - ret = xgmac_wait_until_free(&bus->dev, regs, endian); - if (ret) - return ret; - } + ret = xgmac_wait_until_free(&bus->dev, regs, endian); + if (ret) + return ret; if (priv->has_a009885) /* Once the operation completes, i.e. MDIO_STAT_BSY clears, we @@ -326,8 +392,10 @@ static int xgmac_mdio_probe(struct platform_device *pdev) return -ENOMEM; bus->name = "Freescale XGMAC MDIO Bus"; - bus->read = xgmac_mdio_read; - bus->write = xgmac_mdio_write; + bus->read = xgmac_mdio_read_c22; + bus->write = xgmac_mdio_write_c22; + bus->read_c45 = xgmac_mdio_read_c45; + bus->write_c45 = xgmac_mdio_write_c45; bus->parent = &pdev->dev; bus->probe_capabilities = MDIOBUS_C22_C45; snprintf(bus->id, MII_BUS_ID_SIZE, "%pa", &res->start); diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 59e82d131d88..721f86fd5802 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -110,9 +110,9 @@ static const char e1000_gstrings_test[][ETH_GSTRING_LEN] = { static int e1000_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { + u32 speed, supported, advertising, lp_advertising, lpa_t; struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - u32 speed, supported, advertising; if (hw->phy.media_type == e1000_media_type_copper) { supported = (SUPPORTED_10baseT_Half | @@ -120,7 +120,9 @@ static int e1000_get_link_ksettings(struct net_device *netdev, SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | SUPPORTED_1000baseT_Full | + SUPPORTED_Asym_Pause | SUPPORTED_Autoneg | + SUPPORTED_Pause | SUPPORTED_TP); if (hw->phy.type == e1000_phy_ife) supported &= ~SUPPORTED_1000baseT_Full; @@ -192,10 +194,16 @@ static int e1000_get_link_ksettings(struct net_device *netdev, if (hw->phy.media_type != e1000_media_type_copper) cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; + lpa_t = mii_stat1000_to_ethtool_lpa_t(adapter->phy_regs.stat1000); + lp_advertising = lpa_t | + mii_lpa_to_ethtool_lpa_t(adapter->phy_regs.lpa); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, supported); ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, advertising); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising, + lp_advertising); return 0; } diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index 060b263348ce..08c3d477dd6f 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -2,6 +2,7 @@ /* Copyright(c) 1999 - 2018 Intel Corporation. */ #include "e1000.h" +#include <linux/ethtool.h> static s32 e1000_wait_autoneg(struct e1000_hw *hw); static s32 e1000_access_phy_wakeup_reg_bm(struct e1000_hw *hw, u32 offset, @@ -1011,6 +1012,8 @@ static s32 e1000_phy_setup_autoneg(struct e1000_hw *hw) */ mii_autoneg_adv_reg &= ~(ADVERTISE_PAUSE_ASYM | ADVERTISE_PAUSE_CAP); + phy->autoneg_advertised &= + ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause); break; case e1000_fc_rx_pause: /* Rx Flow control is enabled, and Tx Flow control is @@ -1024,6 +1027,8 @@ static s32 e1000_phy_setup_autoneg(struct e1000_hw *hw) */ mii_autoneg_adv_reg |= (ADVERTISE_PAUSE_ASYM | ADVERTISE_PAUSE_CAP); + phy->autoneg_advertised |= + (ADVERTISED_Pause | ADVERTISED_Asym_Pause); break; case e1000_fc_tx_pause: /* Tx Flow control is enabled, and Rx Flow control is @@ -1031,6 +1036,8 @@ static s32 e1000_phy_setup_autoneg(struct e1000_hw *hw) */ mii_autoneg_adv_reg |= ADVERTISE_PAUSE_ASYM; mii_autoneg_adv_reg &= ~ADVERTISE_PAUSE_CAP; + phy->autoneg_advertised |= ADVERTISED_Asym_Pause; + phy->autoneg_advertised &= ~ADVERTISED_Pause; break; case e1000_fc_full: /* Flow control (both Rx and Tx) is enabled by a software @@ -1038,6 +1045,8 @@ static s32 e1000_phy_setup_autoneg(struct e1000_hw *hw) */ mii_autoneg_adv_reg |= (ADVERTISE_PAUSE_ASYM | ADVERTISE_PAUSE_CAP); + phy->autoneg_advertised |= + (ADVERTISED_Pause | ADVERTISED_Asym_Pause); break; default: e_dbg("Flow control param set incorrectly\n"); diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c index a15927e77272..a1d815af507d 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.c +++ b/drivers/net/ethernet/intel/igc/igc_base.c @@ -396,6 +396,35 @@ void igc_rx_fifo_flush_base(struct igc_hw *hw) rd32(IGC_MPC); } +bool igc_is_device_id_i225(struct igc_hw *hw) +{ + switch (hw->device_id) { + case IGC_DEV_ID_I225_LM: + case IGC_DEV_ID_I225_V: + case IGC_DEV_ID_I225_I: + case IGC_DEV_ID_I225_K: + case IGC_DEV_ID_I225_K2: + case IGC_DEV_ID_I225_LMVP: + case IGC_DEV_ID_I225_IT: + return true; + default: + return false; + } +} + +bool igc_is_device_id_i226(struct igc_hw *hw) +{ + switch (hw->device_id) { + case IGC_DEV_ID_I226_LM: + case IGC_DEV_ID_I226_V: + case IGC_DEV_ID_I226_K: + case IGC_DEV_ID_I226_IT: + return true; + default: + return false; + } +} + static struct igc_mac_operations igc_mac_ops_base = { .init_hw = igc_init_hw_base, .check_for_link = igc_check_for_copper_link, diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h index ce530f5fd7bd..7a992befca24 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.h +++ b/drivers/net/ethernet/intel/igc/igc_base.h @@ -7,6 +7,8 @@ /* forward declaration */ void igc_rx_fifo_flush_base(struct igc_hw *hw); void igc_power_down_phy_copper_base(struct igc_hw *hw); +bool igc_is_device_id_i225(struct igc_hw *hw); +bool igc_is_device_id_i226(struct igc_hw *hw); /* Transmit Descriptor - Advanced */ union igc_adv_tx_desc { diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index e9747ec5ac0b..9dec3563ce3a 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -524,6 +524,7 @@ /* Transmit Scheduling */ #define IGC_TQAVCTRL_TRANSMIT_MODE_TSN 0x00000001 #define IGC_TQAVCTRL_ENHANCED_QAV 0x00000008 +#define IGC_TQAVCTRL_FUTSCDDIS 0x00000080 #define IGC_TXQCTL_QUEUE_MODE_LAUNCHT 0x00000001 #define IGC_TXQCTL_STRICT_CYCLE 0x00000002 diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 44b1740dc098..e86b15efaeb8 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -5958,6 +5958,7 @@ static bool validate_schedule(struct igc_adapter *adapter, const struct tc_taprio_qopt_offload *qopt) { int queue_uses[IGC_MAX_TX_QUEUES] = { }; + struct igc_hw *hw = &adapter->hw; struct timespec64 now; size_t n; @@ -5970,8 +5971,10 @@ static bool validate_schedule(struct igc_adapter *adapter, * in the future, it will hold all the packets until that * time, causing a lot of TX Hangs, so to avoid that, we * reject schedules that would start in the future. + * Note: Limitation above is no longer in i226. */ - if (!is_base_time_past(qopt->base_time, &now)) + if (!is_base_time_past(qopt->base_time, &now) && + igc_is_device_id_i225(hw)) return false; for (n = 0; n < qopt->num_entries; n++) { @@ -6041,6 +6044,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, struct tc_taprio_qopt_offload *qopt) { bool queue_configured[IGC_MAX_TX_QUEUES] = { }; + struct igc_hw *hw = &adapter->hw; u32 start_time = 0, end_time = 0; size_t n; int i; @@ -6053,7 +6057,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, if (qopt->base_time < 0) return -ERANGE; - if (adapter->base_time) + if (igc_is_device_id_i225(hw) && adapter->base_time) return -EALREADY; if (!validate_schedule(adapter, qopt)) diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index bb10d7b65232..a386c8d61dbf 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Intel Corporation */ #include "igc.h" +#include "igc_hw.h" #include "igc_tsn.h" static bool is_any_launchtime(struct igc_adapter *adapter) @@ -92,7 +93,8 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) tqavctrl = rd32(IGC_TQAVCTRL); tqavctrl &= ~(IGC_TQAVCTRL_TRANSMIT_MODE_TSN | - IGC_TQAVCTRL_ENHANCED_QAV); + IGC_TQAVCTRL_ENHANCED_QAV | IGC_TQAVCTRL_FUTSCDDIS); + wr32(IGC_TQAVCTRL, tqavctrl); for (i = 0; i < adapter->num_tx_queues; i++) { @@ -117,20 +119,10 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) ktime_t base_time, systim; int i; - cycle = adapter->cycle_time; - base_time = adapter->base_time; - wr32(IGC_TSAUXC, 0); wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN); wr32(IGC_TXPBS, IGC_TXPBSIZE_TSN); - tqavctrl = rd32(IGC_TQAVCTRL); - tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV; - wr32(IGC_TQAVCTRL, tqavctrl); - - wr32(IGC_QBVCYCLET_S, cycle); - wr32(IGC_QBVCYCLET, cycle); - for (i = 0; i < adapter->num_tx_queues; i++) { struct igc_ring *ring = adapter->tx_ring[i]; u32 txqctl = 0; @@ -233,21 +225,46 @@ skip_cbs: wr32(IGC_TXQCTL(i), txqctl); } + tqavctrl = rd32(IGC_TQAVCTRL) & ~IGC_TQAVCTRL_FUTSCDDIS; + tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV; + + cycle = adapter->cycle_time; + base_time = adapter->base_time; + nsec = rd32(IGC_SYSTIML); sec = rd32(IGC_SYSTIMH); systim = ktime_set(sec, nsec); - if (ktime_compare(systim, base_time) > 0) { - s64 n; + s64 n = div64_s64(ktime_sub_ns(systim, base_time), cycle); - n = div64_s64(ktime_sub_ns(systim, base_time), cycle); base_time = ktime_add_ns(base_time, (n + 1) * cycle); + } else { + /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit + * has to be configured before the cycle time and base time. + * Tx won't hang if there is a GCL is already running, + * so in this case we don't need to set FutScdDis. + */ + if (igc_is_device_id_i226(hw) && + !(rd32(IGC_BASET_H) || rd32(IGC_BASET_L))) + tqavctrl |= IGC_TQAVCTRL_FUTSCDDIS; } - baset_h = div_s64_rem(base_time, NSEC_PER_SEC, &baset_l); + wr32(IGC_TQAVCTRL, tqavctrl); + + wr32(IGC_QBVCYCLET_S, cycle); + wr32(IGC_QBVCYCLET, cycle); + baset_h = div_s64_rem(base_time, NSEC_PER_SEC, &baset_l); wr32(IGC_BASET_H, baset_h); + + /* In i226, Future base time is only supported when FutScdDis bit + * is enabled and only active for re-configuration. + * In this case, initialize the base time with zero to create + * "re-configuration" scenario then only set the desired base time. + */ + if (tqavctrl & IGC_TQAVCTRL_FUTSCDDIS) + wr32(IGC_BASET_L, 0); wr32(IGC_BASET_L, baset_l); return 0; @@ -274,17 +291,14 @@ int igc_tsn_reset(struct igc_adapter *adapter) int igc_tsn_offload_apply(struct igc_adapter *adapter) { - int err; + struct igc_hw *hw = &adapter->hw; - if (netif_running(adapter->netdev)) { + if (netif_running(adapter->netdev) && igc_is_device_id_i225(hw)) { schedule_work(&adapter->reset_task); return 0; } - err = igc_tsn_enable_offload(adapter); - if (err < 0) - return err; + igc_tsn_reset(adapter); - adapter->flags = igc_tsn_new_flags(adapter); return 0; } diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index ef878973b859..2d654a40af13 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -204,21 +204,17 @@ static const struct orion_mdio_ops orion_mdio_xsmi_ops = { .poll_interval_max = MVMDIO_XSMI_POLL_INTERVAL_MAX, }; -static int orion_mdio_xsmi_read(struct mii_bus *bus, int mii_id, - int regnum) +static int orion_mdio_xsmi_read_c45(struct mii_bus *bus, int mii_id, + int dev_addr, int regnum) { struct orion_mdio_dev *dev = bus->priv; - u16 dev_addr = (regnum >> 16) & GENMASK(4, 0); int ret; - if (!(regnum & MII_ADDR_C45)) - return -EOPNOTSUPP; - ret = orion_mdio_wait_ready(&orion_mdio_xsmi_ops, bus); if (ret < 0) return ret; - writel(regnum & GENMASK(15, 0), dev->regs + MVMDIO_XSMI_ADDR_REG); + writel(regnum, dev->regs + MVMDIO_XSMI_ADDR_REG); writel((mii_id << MVMDIO_XSMI_PHYADDR_SHIFT) | (dev_addr << MVMDIO_XSMI_DEVADDR_SHIFT) | MVMDIO_XSMI_READ_OPERATION, @@ -237,21 +233,17 @@ static int orion_mdio_xsmi_read(struct mii_bus *bus, int mii_id, return readl(dev->regs + MVMDIO_XSMI_MGNT_REG) & GENMASK(15, 0); } -static int orion_mdio_xsmi_write(struct mii_bus *bus, int mii_id, - int regnum, u16 value) +static int orion_mdio_xsmi_write_c45(struct mii_bus *bus, int mii_id, + int dev_addr, int regnum, u16 value) { struct orion_mdio_dev *dev = bus->priv; - u16 dev_addr = (regnum >> 16) & GENMASK(4, 0); int ret; - if (!(regnum & MII_ADDR_C45)) - return -EOPNOTSUPP; - ret = orion_mdio_wait_ready(&orion_mdio_xsmi_ops, bus); if (ret < 0) return ret; - writel(regnum & GENMASK(15, 0), dev->regs + MVMDIO_XSMI_ADDR_REG); + writel(regnum, dev->regs + MVMDIO_XSMI_ADDR_REG); writel((mii_id << MVMDIO_XSMI_PHYADDR_SHIFT) | (dev_addr << MVMDIO_XSMI_DEVADDR_SHIFT) | MVMDIO_XSMI_WRITE_OPERATION | value, @@ -302,8 +294,8 @@ static int orion_mdio_probe(struct platform_device *pdev) bus->write = orion_mdio_smi_write; break; case BUS_TYPE_XSMI: - bus->read = orion_mdio_xsmi_read; - bus->write = orion_mdio_xsmi_write; + bus->read_c45 = orion_mdio_xsmi_read_c45; + bus->write_c45 = orion_mdio_xsmi_write_c45; break; } diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.c b/drivers/net/ethernet/mediatek/mtk_wed_wo.c index a0a39643caf7..d32b86499896 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_wo.c +++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.c @@ -138,7 +138,6 @@ mtk_wed_wo_queue_refill(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q, enum dma_data_direction dir = rx ? DMA_FROM_DEVICE : DMA_TO_DEVICE; int n_buf = 0; - spin_lock_bh(&q->lock); while (q->queued < q->n_desc) { struct mtk_wed_wo_queue_entry *entry; dma_addr_t addr; @@ -172,7 +171,6 @@ mtk_wed_wo_queue_refill(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q, q->queued++; n_buf++; } - spin_unlock_bh(&q->lock); return n_buf; } @@ -316,7 +314,6 @@ mtk_wed_wo_queue_rx_clean(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q) { struct page *page; - spin_lock_bh(&q->lock); for (;;) { void *buf = mtk_wed_wo_dequeue(wo, q, NULL, true); @@ -325,7 +322,6 @@ mtk_wed_wo_queue_rx_clean(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q) skb_free_frag(buf); } - spin_unlock_bh(&q->lock); if (!q->cache.va) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index c837103a9ee3..c3c8a7148723 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -47,6 +47,25 @@ #define CREATE_TRACE_POINTS #include "diag/cmd_tracepoint.h" +struct mlx5_ifc_mbox_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_mbox_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + enum { CMD_IF_REV = 5, }; @@ -70,6 +89,26 @@ enum { MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10, }; +static u16 in_to_opcode(void *in) +{ + return MLX5_GET(mbox_in, in, opcode); +} + +/* Returns true for opcodes that might be triggered very frequently and throttle + * the command interface. Limit their command slots usage. + */ +static bool mlx5_cmd_is_throttle_opcode(u16 op) +{ + switch (op) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + return true; + } + return false; +} + static struct mlx5_cmd_work_ent * cmd_alloc_ent(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in, struct mlx5_cmd_msg *out, void *uout, int uout_size, @@ -91,6 +130,7 @@ cmd_alloc_ent(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in, ent->context = context; ent->cmd = cmd; ent->page_queue = page_queue; + ent->op = in_to_opcode(in->first.data); refcount_set(&ent->refcnt, 1); return ent; @@ -752,25 +792,6 @@ static int cmd_status_to_err(u8 status) } } -struct mlx5_ifc_mbox_out_bits { - u8 status[0x8]; - u8 reserved_at_8[0x18]; - - u8 syndrome[0x20]; - - u8 reserved_at_40[0x40]; -}; - -struct mlx5_ifc_mbox_in_bits { - u8 opcode[0x10]; - u8 uid[0x10]; - - u8 reserved_at_20[0x10]; - u8 op_mod[0x10]; - - u8 reserved_at_40[0x40]; -}; - void mlx5_cmd_out_err(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out) { u32 syndrome = MLX5_GET(mbox_out, out, syndrome); @@ -788,7 +809,7 @@ static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out) u16 opcode, op_mod; u16 uid; - opcode = MLX5_GET(mbox_in, in, opcode); + opcode = in_to_opcode(in); op_mod = MLX5_GET(mbox_in, in, op_mod); uid = MLX5_GET(mbox_in, in, uid); @@ -800,7 +821,7 @@ int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out) { /* aborted due to PCI error or via reset flow mlx5_cmd_trigger_completions() */ if (err == -ENXIO) { - u16 opcode = MLX5_GET(mbox_in, in, opcode); + u16 opcode = in_to_opcode(in); u32 syndrome; u8 status; @@ -829,9 +850,9 @@ static void dump_command(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent, int input) { struct mlx5_cmd_msg *msg = input ? ent->in : ent->out; - u16 op = MLX5_GET(mbox_in, ent->lay->in, opcode); struct mlx5_cmd_mailbox *next = msg->next; int n = mlx5_calc_cmd_blocks(msg); + u16 op = ent->op; int data_only; u32 offset = 0; int dump_len; @@ -883,11 +904,6 @@ static void dump_command(struct mlx5_core_dev *dev, mlx5_core_dbg(dev, "cmd[%d]: end dump\n", ent->idx); } -static u16 msg_to_opcode(struct mlx5_cmd_msg *in) -{ - return MLX5_GET(mbox_in, in->first.data, opcode); -} - static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); static void cb_timeout_handler(struct work_struct *work) @@ -905,13 +921,13 @@ static void cb_timeout_handler(struct work_struct *work) /* Maybe got handled by eq recover ? */ if (!test_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state)) { mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, recovered after timeout\n", ent->idx, - mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + mlx5_command_str(ent->op), ent->op); goto out; /* phew, already handled */ } ent->ret = -ETIMEDOUT; mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n", - ent->idx, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + ent->idx, mlx5_command_str(ent->op), ent->op); mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); out: @@ -985,7 +1001,6 @@ static void cmd_work_handler(struct work_struct *work) ent->lay = lay; memset(lay, 0, sizeof(*lay)); memcpy(lay->in, ent->in->first.data, sizeof(lay->in)); - ent->op = be32_to_cpu(lay->in[0]) >> 16; if (ent->in->next) lay->in_ptr = cpu_to_be64(ent->in->next->dma); lay->inlen = cpu_to_be32(ent->in->len); @@ -1098,12 +1113,12 @@ static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev, */ if (wait_for_completion_timeout(&ent->done, timeout)) { mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) recovered after timeout\n", ent->idx, - mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + mlx5_command_str(ent->op), ent->op); return; } mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) No done completion\n", ent->idx, - mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + mlx5_command_str(ent->op), ent->op); ent->ret = -ETIMEDOUT; mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); @@ -1130,12 +1145,10 @@ out_err: if (err == -ETIMEDOUT) { mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", - mlx5_command_str(msg_to_opcode(ent->in)), - msg_to_opcode(ent->in)); + mlx5_command_str(ent->op), ent->op); } else if (err == -ECANCELED) { mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n", - mlx5_command_str(msg_to_opcode(ent->in)), - msg_to_opcode(ent->in)); + mlx5_command_str(ent->op), ent->op); } mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n", err, deliv_status_to_str(ent->status), ent->status); @@ -1169,7 +1182,6 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, u8 status = 0; int err = 0; s64 ds; - u16 op; if (callback && page_queue) return -EINVAL; @@ -1209,9 +1221,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, goto out_free; ds = ent->ts2 - ent->ts1; - op = MLX5_GET(mbox_in, in->first.data, opcode); - if (op < MLX5_CMD_OP_MAX) { - stats = &cmd->stats[op]; + if (ent->op < MLX5_CMD_OP_MAX) { + stats = &cmd->stats[ent->op]; spin_lock_irq(&stats->lock); stats->sum += ds; ++stats->n; @@ -1219,7 +1230,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, } mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, "fw exec time for %s is %lld nsec\n", - mlx5_command_str(op), ds); + mlx5_command_str(ent->op), ds); out_free: status = ent->status; @@ -1816,7 +1827,7 @@ cache_miss: static int is_manage_pages(void *in) { - return MLX5_GET(mbox_in, in, opcode) == MLX5_CMD_OP_MANAGE_PAGES; + return in_to_opcode(in) == MLX5_CMD_OP_MANAGE_PAGES; } /* Notes: @@ -1827,8 +1838,9 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size, mlx5_cmd_cbk_t callback, void *context, bool force_polling) { - u16 opcode = MLX5_GET(mbox_in, in, opcode); struct mlx5_cmd_msg *inb, *outb; + u16 opcode = in_to_opcode(in); + bool throttle_op; int pages_queue; gfp_t gfp; u8 token; @@ -1837,13 +1849,21 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) return -ENXIO; + throttle_op = mlx5_cmd_is_throttle_opcode(opcode); + if (throttle_op) { + /* atomic context may not sleep */ + if (callback) + return -EINVAL; + down(&dev->cmd.throttle_sem); + } + pages_queue = is_manage_pages(in); gfp = callback ? GFP_ATOMIC : GFP_KERNEL; inb = alloc_msg(dev, in_size, gfp); if (IS_ERR(inb)) { err = PTR_ERR(inb); - return err; + goto out_up; } token = alloc_token(&dev->cmd); @@ -1877,6 +1897,9 @@ out_out: mlx5_free_cmd_msg(dev, outb); out_in: free_msg(dev, inb); +out_up: + if (throttle_op) + up(&dev->cmd.throttle_sem); return err; } @@ -1950,8 +1973,8 @@ static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, u16 op int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size) { int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); - u16 opcode = MLX5_GET(mbox_in, in, opcode); u16 op_mod = MLX5_GET(mbox_in, in, op_mod); + u16 opcode = in_to_opcode(in); return cmd_status_err(dev, err, opcode, op_mod, out); } @@ -1996,8 +2019,8 @@ int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size) { int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); - u16 opcode = MLX5_GET(mbox_in, in, opcode); u16 op_mod = MLX5_GET(mbox_in, in, op_mod); + u16 opcode = in_to_opcode(in); err = cmd_status_err(dev, err, opcode, op_mod, out); return mlx5_cmd_check(dev, err, in, out); @@ -2049,7 +2072,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, work->ctx = ctx; work->user_callback = callback; - work->opcode = MLX5_GET(mbox_in, in, opcode); + work->opcode = in_to_opcode(in); work->op_mod = MLX5_GET(mbox_in, in, op_mod); work->out = out; if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) @@ -2220,6 +2243,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) sema_init(&cmd->sem, cmd->max_reg_cmds); sema_init(&cmd->pages_sem, 1); + sema_init(&cmd->throttle_sem, DIV_ROUND_UP(cmd->max_reg_cmds, 2)); cmd_h = (u32)((u64)(cmd->dma) >> 32); cmd_l = (u32)(cmd->dma); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 0571e40c6ee5..5b6b0b126e52 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -59,6 +59,9 @@ bool mlx5_eth_supported(struct mlx5_core_dev *dev) if (!IS_ENABLED(CONFIG_MLX5_CORE_EN)) return false; + if (mlx5_core_is_management_pf(dev)) + return false; + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return false; @@ -198,6 +201,9 @@ bool mlx5_rdma_supported(struct mlx5_core_dev *dev) if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) return false; + if (mlx5_core_is_management_pf(dev)) + return false; + if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV) return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 464eb3a18450..b70e36025d92 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -75,6 +75,10 @@ int mlx5_ec_init(struct mlx5_core_dev *dev) if (!mlx5_core_is_ecpf(dev)) return 0; + /* Management PF don't have a peer PF */ + if (mlx5_core_is_management_pf(dev)) + return 0; + return mlx5_host_pf_init(dev); } @@ -85,6 +89,10 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev) if (!mlx5_core_is_ecpf(dev)) return; + /* Management PF don't have a peer PF */ + if (mlx5_core_is_management_pf(dev)) + return; + mlx5_host_pf_cleanup(dev); err = mlx5_wait_for_pages(dev, &dev->priv.host_pf_pages); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 2d77fb8a8a01..82573ac722d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -247,7 +247,7 @@ struct mlx5e_rx_wqe_ll { }; struct mlx5e_rx_wqe_cyc { - struct mlx5_wqe_data_seg data[0]; + DECLARE_FLEX_ARRAY(struct mlx5_wqe_data_seg, data); }; struct mlx5e_umr_wqe { @@ -968,6 +968,7 @@ struct mlx5e_priv { struct mlx5e_scratchpad scratchpad; struct mlx5e_htb *htb; struct mlx5e_mqprio_rl *mqprio_rl; + struct dentry *dfs_root; }; struct mlx5e_rx_handlers { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 379c6dc9a3be..5233d4daca41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -145,7 +145,8 @@ void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple, struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile, struct mlx5_core_dev *mdev, - bool state_destroy); + bool state_destroy, + struct dentry *dfs_root); void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs); struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs); void mlx5e_fs_set_tc(struct mlx5e_flow_steering *fs, struct mlx5e_tc_table *tc); @@ -189,6 +190,8 @@ int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs, __be16 proto, u16 vid); void mlx5e_fs_init_l2_addr(struct mlx5e_flow_steering *fs, struct net_device *netdev); +struct dentry *mlx5e_fs_get_debugfs_root(struct mlx5e_flow_steering *fs); + #define fs_err(fs, fmt, ...) \ mlx5_core_err(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 89510cac46c2..505ba41195b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -287,6 +287,78 @@ int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in) return err; } +int mlx5e_port_query_sbpr(struct mlx5_core_dev *mdev, u32 desc, u8 dir, + u8 pool_idx, void *out, int size_out) +{ + u32 in[MLX5_ST_SZ_DW(sbpr_reg)] = {}; + + MLX5_SET(sbpr_reg, in, desc, desc); + MLX5_SET(sbpr_reg, in, dir, dir); + MLX5_SET(sbpr_reg, in, pool, pool_idx); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, size_out, MLX5_REG_SBPR, 0, 0); +} + +int mlx5e_port_set_sbpr(struct mlx5_core_dev *mdev, u32 desc, u8 dir, + u8 pool_idx, u32 infi_size, u32 size) +{ + u32 out[MLX5_ST_SZ_DW(sbpr_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(sbpr_reg)] = {}; + + MLX5_SET(sbpr_reg, in, desc, desc); + MLX5_SET(sbpr_reg, in, dir, dir); + MLX5_SET(sbpr_reg, in, pool, pool_idx); + MLX5_SET(sbpr_reg, in, infi_size, infi_size); + MLX5_SET(sbpr_reg, in, size, size); + MLX5_SET(sbpr_reg, in, mode, 1); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), MLX5_REG_SBPR, 0, 1); +} + +static int mlx5e_port_query_sbcm(struct mlx5_core_dev *mdev, u32 desc, + u8 pg_buff_idx, u8 dir, void *out, + int size_out) +{ + u32 in[MLX5_ST_SZ_DW(sbcm_reg)] = {}; + + MLX5_SET(sbcm_reg, in, desc, desc); + MLX5_SET(sbcm_reg, in, local_port, 1); + MLX5_SET(sbcm_reg, in, pg_buff, pg_buff_idx); + MLX5_SET(sbcm_reg, in, dir, dir); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, size_out, MLX5_REG_SBCM, 0, 0); +} + +int mlx5e_port_set_sbcm(struct mlx5_core_dev *mdev, u32 desc, u8 pg_buff_idx, + u8 dir, u8 infi_size, u32 max_buff, u8 pool_idx) +{ + u32 out[MLX5_ST_SZ_DW(sbcm_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(sbcm_reg)] = {}; + u32 min_buff; + int err; + u8 exc; + + err = mlx5e_port_query_sbcm(mdev, desc, pg_buff_idx, dir, out, + sizeof(out)); + if (err) + return err; + + exc = MLX5_GET(sbcm_reg, out, exc); + min_buff = MLX5_GET(sbcm_reg, out, min_buff); + + MLX5_SET(sbcm_reg, in, desc, desc); + MLX5_SET(sbcm_reg, in, local_port, 1); + MLX5_SET(sbcm_reg, in, pg_buff, pg_buff_idx); + MLX5_SET(sbcm_reg, in, dir, dir); + MLX5_SET(sbcm_reg, in, exc, exc); + MLX5_SET(sbcm_reg, in, min_buff, min_buff); + MLX5_SET(sbcm_reg, in, infi_max, infi_size); + MLX5_SET(sbcm_reg, in, max_buff, max_buff); + MLX5_SET(sbcm_reg, in, pool, pool_idx); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), MLX5_REG_SBCM, 0, 1); +} + /* buffer[i]: buffer that priority i mapped to */ int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index 7a7defe60792..3f474e370828 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -57,6 +57,12 @@ u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev); int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out); int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); +int mlx5e_port_query_sbpr(struct mlx5_core_dev *mdev, u32 desc, u8 dir, + u8 pool_idx, void *out, int size_out); +int mlx5e_port_set_sbpr(struct mlx5_core_dev *mdev, u32 desc, u8 dir, + u8 pool_idx, u32 infi_size, u32 size); +int mlx5e_port_set_sbcm(struct mlx5_core_dev *mdev, u32 desc, u8 pg_buff_idx, + u8 dir, u8 infi_size, u32 max_buff, u8 pool_idx); int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index c9d5d8d93994..57f4b1b50421 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -73,6 +73,7 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv, port_buffer->buffer[i].lossy); } + port_buffer->headroom_size = total_used; port_buffer->port_buffer_size = MLX5_GET(pbmc_reg, out, port_buffer_size) * port_buff_cell_sz; port_buffer->spare_buffer_size = @@ -86,16 +87,204 @@ out: return err; } +struct mlx5e_buffer_pool { + u32 infi_size; + u32 size; + u32 buff_occupancy; +}; + +static int mlx5e_port_query_pool(struct mlx5_core_dev *mdev, + struct mlx5e_buffer_pool *buffer_pool, + u32 desc, u8 dir, u8 pool_idx) +{ + u32 out[MLX5_ST_SZ_DW(sbpr_reg)] = {}; + int err; + + err = mlx5e_port_query_sbpr(mdev, desc, dir, pool_idx, out, + sizeof(out)); + if (err) + return err; + + buffer_pool->size = MLX5_GET(sbpr_reg, out, size); + buffer_pool->infi_size = MLX5_GET(sbpr_reg, out, infi_size); + buffer_pool->buff_occupancy = MLX5_GET(sbpr_reg, out, buff_occupancy); + + return err; +} + +enum { + MLX5_INGRESS_DIR = 0, + MLX5_EGRESS_DIR = 1, +}; + +enum { + MLX5_LOSSY_POOL = 0, + MLX5_LOSSLESS_POOL = 1, +}; + +/* No limit on usage of shared buffer pool (max_buff=0) */ +#define MLX5_SB_POOL_NO_THRESHOLD 0 +/* Shared buffer pool usage threshold when calculated + * dynamically in alpha units. alpha=13 is equivalent to + * HW_alpha of [(1/128) * 2 ^ (alpha-1)] = 32, where HW_alpha + * equates to the following portion of the shared buffer pool: + * [32 / (1 + n * 32)] While *n* is the number of buffers + * that are using the shared buffer pool. + */ +#define MLX5_SB_POOL_THRESHOLD 13 + +/* Shared buffer class management parameters */ +struct mlx5_sbcm_params { + u8 pool_idx; + u8 max_buff; + u8 infi_size; +}; + +static const struct mlx5_sbcm_params sbcm_default = { + .pool_idx = MLX5_LOSSY_POOL, + .max_buff = MLX5_SB_POOL_NO_THRESHOLD, + .infi_size = 0, +}; + +static const struct mlx5_sbcm_params sbcm_lossy = { + .pool_idx = MLX5_LOSSY_POOL, + .max_buff = MLX5_SB_POOL_NO_THRESHOLD, + .infi_size = 1, +}; + +static const struct mlx5_sbcm_params sbcm_lossless = { + .pool_idx = MLX5_LOSSLESS_POOL, + .max_buff = MLX5_SB_POOL_THRESHOLD, + .infi_size = 0, +}; + +static const struct mlx5_sbcm_params sbcm_lossless_no_threshold = { + .pool_idx = MLX5_LOSSLESS_POOL, + .max_buff = MLX5_SB_POOL_NO_THRESHOLD, + .infi_size = 1, +}; + +/** + * select_sbcm_params() - selects the shared buffer pool configuration + * + * @buffer: <input> port buffer to retrieve params of + * @lossless_buff_count: <input> number of lossless buffers in total + * + * The selection is based on the following rules: + * 1. If buffer size is 0, no shared buffer pool is used. + * 2. If buffer is lossy, use lossy shared buffer pool. + * 3. If there are more than 1 lossless buffers, use lossless shared buffer pool + * with threshold. + * 4. If there is only 1 lossless buffer, use lossless shared buffer pool + * without threshold. + * + * @return const struct mlx5_sbcm_params* selected values + */ +static const struct mlx5_sbcm_params * +select_sbcm_params(struct mlx5e_bufferx_reg *buffer, u8 lossless_buff_count) +{ + if (buffer->size == 0) + return &sbcm_default; + + if (buffer->lossy) + return &sbcm_lossy; + + if (lossless_buff_count > 1) + return &sbcm_lossless; + + return &sbcm_lossless_no_threshold; +} + +static int port_update_pool_cfg(struct mlx5_core_dev *mdev, + struct mlx5e_port_buffer *port_buffer) +{ + const struct mlx5_sbcm_params *p; + u8 lossless_buff_count = 0; + int err; + int i; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return 0; + + for (i = 0; i < MLX5E_MAX_BUFFER; i++) + lossless_buff_count += ((port_buffer->buffer[i].size) && + (!(port_buffer->buffer[i].lossy))); + + for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + p = select_sbcm_params(&port_buffer->buffer[i], lossless_buff_count); + err = mlx5e_port_set_sbcm(mdev, 0, i, + MLX5_INGRESS_DIR, + p->infi_size, + p->max_buff, + p->pool_idx); + if (err) + return err; + } + + return 0; +} + +static int port_update_shared_buffer(struct mlx5_core_dev *mdev, + u32 current_headroom_size, + u32 new_headroom_size) +{ + struct mlx5e_buffer_pool lossless_ipool; + struct mlx5e_buffer_pool lossy_epool; + u32 lossless_ipool_size; + u32 shared_buffer_size; + u32 total_buffer_size; + u32 lossy_epool_size; + int err; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return 0; + + err = mlx5e_port_query_pool(mdev, &lossy_epool, 0, MLX5_EGRESS_DIR, + MLX5_LOSSY_POOL); + if (err) + return err; + + err = mlx5e_port_query_pool(mdev, &lossless_ipool, 0, MLX5_INGRESS_DIR, + MLX5_LOSSLESS_POOL); + if (err) + return err; + + total_buffer_size = current_headroom_size + lossy_epool.size + + lossless_ipool.size; + shared_buffer_size = total_buffer_size - new_headroom_size; + + if (shared_buffer_size < 4) { + pr_err("Requested port buffer is too large, not enough space left for shared buffer\n"); + return -EINVAL; + } + + /* Total shared buffer size is split in a ratio of 3:1 between + * lossy and lossless pools respectively. + */ + lossy_epool_size = (shared_buffer_size / 4) * 3; + lossless_ipool_size = shared_buffer_size / 4; + + mlx5e_port_set_sbpr(mdev, 0, MLX5_EGRESS_DIR, MLX5_LOSSY_POOL, 0, + lossy_epool_size); + mlx5e_port_set_sbpr(mdev, 0, MLX5_INGRESS_DIR, MLX5_LOSSLESS_POOL, 0, + lossless_ipool_size); + return 0; +} + static int port_set_buffer(struct mlx5e_priv *priv, struct mlx5e_port_buffer *port_buffer) { u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; struct mlx5_core_dev *mdev = priv->mdev; int sz = MLX5_ST_SZ_BYTES(pbmc_reg); + u32 new_headroom_size = 0; + u32 current_headroom_size; void *in; int err; int i; + current_headroom_size = port_buffer->headroom_size; + in = kzalloc(sz, GFP_KERNEL); if (!in) return -ENOMEM; @@ -110,6 +299,7 @@ static int port_set_buffer(struct mlx5e_priv *priv, u64 xoff = port_buffer->buffer[i].xoff; u64 xon = port_buffer->buffer[i].xon; + new_headroom_size += size; do_div(size, port_buff_cell_sz); do_div(xoff, port_buff_cell_sz); do_div(xon, port_buff_cell_sz); @@ -119,6 +309,17 @@ static int port_set_buffer(struct mlx5e_priv *priv, MLX5_SET(bufferx_reg, buffer, xon_threshold, xon); } + new_headroom_size /= port_buff_cell_sz; + current_headroom_size /= port_buff_cell_sz; + err = port_update_shared_buffer(priv->mdev, current_headroom_size, + new_headroom_size); + if (err) + return err; + + err = port_update_pool_cfg(priv->mdev, port_buffer); + if (err) + return err; + err = mlx5e_port_set_pbmc(mdev, in); out: kfree(in); @@ -174,6 +375,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, /** * update_buffer_lossy - Update buffer configuration based on pfc + * @mdev: port function core device * @max_mtu: netdev's max_mtu * @pfc_en: <input> current pfc configuration * @buffer: <input> current prio to buffer mapping @@ -192,7 +394,8 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, * @return: 0 if no error, * sets change to true if buffer configuration was modified. */ -static int update_buffer_lossy(unsigned int max_mtu, +static int update_buffer_lossy(struct mlx5_core_dev *mdev, + unsigned int max_mtu, u8 pfc_en, u8 *buffer, u32 xoff, u16 port_buff_cell_sz, struct mlx5e_port_buffer *port_buffer, bool *change) @@ -229,6 +432,10 @@ static int update_buffer_lossy(unsigned int max_mtu, } if (changed) { + err = port_update_pool_cfg(mdev, port_buffer); + if (err) + return err; + err = update_xoff_threshold(port_buffer, xoff, max_mtu, port_buff_cell_sz); if (err) return err; @@ -293,23 +500,30 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, } if (change & MLX5E_PORT_BUFFER_PFC) { + mlx5e_dbg(HW, priv, "%s: requested PFC per priority bitmask: 0x%x\n", + __func__, pfc->pfc_en); err = mlx5e_port_query_priority2buffer(priv->mdev, buffer); if (err) return err; - err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff, port_buff_cell_sz, - &port_buffer, &update_buffer); + err = update_buffer_lossy(priv->mdev, max_mtu, pfc->pfc_en, buffer, xoff, + port_buff_cell_sz, &port_buffer, + &update_buffer); if (err) return err; } if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) { update_prio2buffer = true; + for (i = 0; i < MLX5E_MAX_BUFFER; i++) + mlx5e_dbg(HW, priv, "%s: requested to map prio[%d] to buffer %d\n", + __func__, i, prio2buffer[i]); + err = fill_pfc_en(priv->mdev, &curr_pfc_en); if (err) return err; - err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, xoff, + err = update_buffer_lossy(priv->mdev, max_mtu, curr_pfc_en, prio2buffer, xoff, port_buff_cell_sz, &port_buffer, &update_buffer); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h index 80af7a5ac604..a6ef118de758 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h @@ -60,6 +60,7 @@ struct mlx5e_bufferx_reg { struct mlx5e_port_buffer { u32 port_buffer_size; u32 spare_buffer_size; + u32 headroom_size; struct mlx5e_bufferx_reg buffer[MLX5E_MAX_BUFFER]; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c index 78af8a3175bf..7758a425bfa8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c @@ -28,7 +28,7 @@ struct mlx5e_flow_meter_aso_obj { int base_id; int total_meters; - unsigned long meters_map[0]; /* must be at the end of this struct */ + unsigned long meters_map[]; /* must be at the end of this struct */ }; struct mlx5e_flow_meters { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index d7c020f72401..88a5aed9d678 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -365,7 +365,7 @@ void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) accel_fs_tcp_destroy_table(fs, i); - kvfree(accel_tcp); + kfree(accel_tcp); mlx5e_fs_set_accel_tcp(fs, NULL); } @@ -377,7 +377,7 @@ int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) if (!MLX5_CAP_FLOWTABLE_NIC_RX(mlx5e_fs_get_mdev(fs), ft_field_support.outer_ip_version)) return -EOPNOTSUPP; - accel_tcp = kvzalloc(sizeof(*accel_tcp), GFP_KERNEL); + accel_tcp = kzalloc(sizeof(*accel_tcp), GFP_KERNEL); if (!accel_tcp) return -ENOMEM; mlx5e_fs_set_accel_tcp(fs, accel_tcp); @@ -397,7 +397,7 @@ int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) err_destroy_tables: while (--i >= 0) accel_fs_tcp_destroy_table(fs, i); - kvfree(accel_tcp); + kfree(accel_tcp); mlx5e_fs_set_accel_tcp(fs, NULL); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index da2184c94203..eb5b09f81dec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB // Copyright (c) 2019 Mellanox Technologies. +#include <linux/debugfs.h> #include "en.h" #include "lib/mlx5.h" #include "en_accel/ktls.h" @@ -177,6 +178,15 @@ void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv) destroy_workqueue(priv->tls->rx_wq); } +static void mlx5e_tls_debugfs_init(struct mlx5e_tls *tls, + struct dentry *dfs_root) +{ + if (IS_ERR_OR_NULL(dfs_root)) + return; + + tls->debugfs.dfs = debugfs_create_dir("tls", dfs_root); +} + int mlx5e_ktls_init(struct mlx5e_priv *priv) { struct mlx5e_tls *tls; @@ -189,11 +199,23 @@ int mlx5e_ktls_init(struct mlx5e_priv *priv) return -ENOMEM; priv->tls = tls; + priv->tls->mdev = priv->mdev; + + mlx5e_tls_debugfs_init(tls, priv->dfs_root); + return 0; } void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) { + struct mlx5e_tls *tls = priv->tls; + + if (!mlx5e_is_ktls_device(priv->mdev)) + return; + + debugfs_remove_recursive(tls->debugfs.dfs); + tls->debugfs.dfs = NULL; + kfree(priv->tls); priv->tls = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index 1c35045e41fb..fccf995ee16d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -4,6 +4,7 @@ #ifndef __MLX5E_KTLS_H__ #define __MLX5E_KTLS_H__ +#include <linux/debugfs.h> #include <linux/tls.h> #include <net/tls.h> #include "en.h" @@ -72,10 +73,17 @@ struct mlx5e_tls_sw_stats { atomic64_t rx_tls_del; }; +struct mlx5e_tls_debugfs { + struct dentry *dfs; + struct dentry *dfs_tx; +}; + struct mlx5e_tls { + struct mlx5_core_dev *mdev; struct mlx5e_tls_sw_stats sw_stats; struct workqueue_struct *rx_wq; struct mlx5e_tls_tx_pool *tx_pool; + struct mlx5e_tls_debugfs debugfs; }; int mlx5e_ktls_init(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 78072bf93f3f..6db27062b765 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB // Copyright (c) 2019 Mellanox Technologies. +#include <linux/debugfs.h> #include "en_accel/ktls.h" #include "en_accel/ktls_txrx.h" #include "en_accel/ktls_utils.h" @@ -886,8 +887,24 @@ err_out: return false; } +static void mlx5e_tls_tx_debugfs_init(struct mlx5e_tls *tls, + struct dentry *dfs_root) +{ + if (IS_ERR_OR_NULL(dfs_root)) + return; + + tls->debugfs.dfs_tx = debugfs_create_dir("tx", dfs_root); + if (!tls->debugfs.dfs_tx) + return; + + debugfs_create_size_t("pool_size", 0400, tls->debugfs.dfs_tx, + &tls->tx_pool->size); +} + int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) { + struct mlx5e_tls *tls = priv->tls; + if (!mlx5e_is_ktls_tx(priv->mdev)) return 0; @@ -895,6 +912,8 @@ int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) if (!priv->tls->tx_pool) return -ENOMEM; + mlx5e_tls_tx_debugfs_init(tls, tls->debugfs.dfs); + return 0; } @@ -903,6 +922,9 @@ void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv) if (!mlx5e_is_ktls_tx(priv->mdev)) return; + debugfs_remove_recursive(priv->tls->debugfs.dfs_tx); + priv->tls->debugfs.dfs_tx = NULL; + mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool); priv->tls->tx_pool = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 1892ccb889b3..7298fe782e9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include <linux/debugfs.h> #include <linux/list.h> #include <linux/ip.h> #include <linux/ipv6.h> @@ -67,6 +68,7 @@ struct mlx5e_flow_steering { struct mlx5e_fs_udp *udp; struct mlx5e_fs_any *any; struct mlx5e_ptp_fs *ptp_fs; + struct dentry *dfs_root; }; static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs, @@ -104,6 +106,11 @@ static inline int mlx5e_hash_l2(const u8 *addr) return addr[5]; } +struct dentry *mlx5e_fs_get_debugfs_root(struct mlx5e_flow_steering *fs) +{ + return fs->dfs_root; +} + static void mlx5e_add_l2_to_hash(struct hlist_head *hash, const u8 *addr) { struct mlx5e_l2_hash_node *hn; @@ -1429,9 +1436,19 @@ static int mlx5e_fs_ethtool_alloc(struct mlx5e_flow_steering *fs) static void mlx5e_fs_ethtool_free(struct mlx5e_flow_steering *fs) { } #endif +static void mlx5e_fs_debugfs_init(struct mlx5e_flow_steering *fs, + struct dentry *dfs_root) +{ + if (IS_ERR_OR_NULL(dfs_root)) + return; + + fs->dfs_root = debugfs_create_dir("fs", dfs_root); +} + struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile, struct mlx5_core_dev *mdev, - bool state_destroy) + bool state_destroy, + struct dentry *dfs_root) { struct mlx5e_flow_steering *fs; int err; @@ -1458,6 +1475,8 @@ struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile, if (err) goto err_free_tc; + mlx5e_fs_debugfs_init(fs, dfs_root); + return fs; err_free_tc: mlx5e_fs_tc_free(fs); @@ -1471,6 +1490,7 @@ err: void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs) { + debugfs_remove_recursive(fs->dfs_root); mlx5e_fs_ethtool_free(fs); mlx5e_fs_tc_free(fs); mlx5e_fs_vlan_free(fs); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index abcc614b6191..6bb0fdaa5efa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -35,6 +35,7 @@ #include <net/vxlan.h> #include <net/geneve.h> #include <linux/bpf.h> +#include <linux/debugfs.h> #include <linux/if_bridge.h> #include <linux/filter.h> #include <net/page_pool.h> @@ -5233,7 +5234,8 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, mlx5e_timestamp_init(priv); fs = mlx5e_fs_init(priv->profile, mdev, - !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + !test_bit(MLX5E_STATE_DESTROYING, &priv->state), + priv->dfs_root); if (!fs) { err = -ENOMEM; mlx5_core_err(mdev, "FS initialization failed, %d\n", err); @@ -5934,6 +5936,9 @@ static int mlx5e_probe(struct auxiliary_device *adev, priv->profile = profile; priv->ppriv = NULL; + priv->dfs_root = debugfs_create_dir("nic", + mlx5_debugfs_get_dev_root(priv->mdev)); + err = mlx5e_devlink_port_register(priv); if (err) { mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); @@ -5971,6 +5976,7 @@ err_profile_cleanup: err_devlink_cleanup: mlx5e_devlink_port_unregister(priv); err_destroy_netdev: + debugfs_remove_recursive(priv->dfs_root); mlx5e_destroy_netdev(priv); return err; } @@ -5985,6 +5991,7 @@ static void mlx5e_remove(struct auxiliary_device *adev) mlx5e_suspend(adev, state); priv->profile->cleanup(priv); mlx5e_devlink_port_unregister(priv); + debugfs_remove_recursive(priv->dfs_root); mlx5e_destroy_netdev(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 7d90e5b72854..8d29310c7e48 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -789,8 +789,10 @@ static int mlx5e_init_rep(struct mlx5_core_dev *mdev, { struct mlx5e_priv *priv = netdev_priv(netdev); - priv->fs = mlx5e_fs_init(priv->profile, mdev, - !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + priv->fs = + mlx5e_fs_init(priv->profile, mdev, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state), + priv->dfs_root); if (!priv->fs) { netdev_err(priv->netdev, "FS allocation failed\n"); return -ENOMEM; @@ -808,7 +810,8 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv = netdev_priv(netdev); priv->fs = mlx5e_fs_init(priv->profile, mdev, - !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + !test_bit(MLX5E_STATE_DESTROYING, &priv->state), + priv->dfs_root); if (!priv->fs) { netdev_err(priv->netdev, "FS allocation failed\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index dbadaf166487..0c04a5e7c274 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -71,6 +71,12 @@ #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) +struct mlx5e_hairpin_params { + struct mlx5_core_dev *mdev; + u32 num_queues; + u32 queue_size; +}; + struct mlx5e_tc_table { /* Protects the dynamic assignment of the t parameter * which is the nic tc root table. @@ -93,6 +99,8 @@ struct mlx5e_tc_table { struct mlx5_tc_ct_priv *ct; struct mapping_ctx *mapping; + struct mlx5e_hairpin_params hairpin_params; + struct dentry *dfs_root; }; struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { @@ -1016,6 +1024,138 @@ static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, return 0; } +static int debugfs_hairpin_queues_set(void *data, u64 val) +{ + struct mlx5e_hairpin_params *hp = data; + + if (!val) { + mlx5_core_err(hp->mdev, + "Number of hairpin queues must be > 0\n"); + return -EINVAL; + } + + hp->num_queues = val; + + return 0; +} + +static int debugfs_hairpin_queues_get(void *data, u64 *val) +{ + struct mlx5e_hairpin_params *hp = data; + + *val = hp->num_queues; + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_hairpin_queues, debugfs_hairpin_queues_get, + debugfs_hairpin_queues_set, "%llu\n"); + +static int debugfs_hairpin_queue_size_set(void *data, u64 val) +{ + struct mlx5e_hairpin_params *hp = data; + + if (val > BIT(MLX5_CAP_GEN(hp->mdev, log_max_hairpin_num_packets))) { + mlx5_core_err(hp->mdev, + "Invalid hairpin queue size, must be <= %lu\n", + BIT(MLX5_CAP_GEN(hp->mdev, + log_max_hairpin_num_packets))); + return -EINVAL; + } + + hp->queue_size = roundup_pow_of_two(val); + + return 0; +} + +static int debugfs_hairpin_queue_size_get(void *data, u64 *val) +{ + struct mlx5e_hairpin_params *hp = data; + + *val = hp->queue_size; + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_hairpin_queue_size, + debugfs_hairpin_queue_size_get, + debugfs_hairpin_queue_size_set, "%llu\n"); + +static int debugfs_hairpin_num_active_get(void *data, u64 *val) +{ + struct mlx5e_tc_table *tc = data; + struct mlx5e_hairpin_entry *hpe; + u32 cnt = 0; + u32 bkt; + + mutex_lock(&tc->hairpin_tbl_lock); + hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) + cnt++; + mutex_unlock(&tc->hairpin_tbl_lock); + + *val = cnt; + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_hairpin_num_active, + debugfs_hairpin_num_active_get, NULL, "%llu\n"); + +static int debugfs_hairpin_table_dump_show(struct seq_file *file, void *priv) + +{ + struct mlx5e_tc_table *tc = file->private; + struct mlx5e_hairpin_entry *hpe; + u32 bkt; + + mutex_lock(&tc->hairpin_tbl_lock); + hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) + seq_printf(file, "Hairpin peer_vhca_id %u prio %u refcnt %u\n", + hpe->peer_vhca_id, hpe->prio, + refcount_read(&hpe->refcnt)); + mutex_unlock(&tc->hairpin_tbl_lock); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(debugfs_hairpin_table_dump); + +static void mlx5e_tc_debugfs_init(struct mlx5e_tc_table *tc, + struct dentry *dfs_root) +{ + if (IS_ERR_OR_NULL(dfs_root)) + return; + + tc->dfs_root = debugfs_create_dir("tc", dfs_root); + if (!tc->dfs_root) + return; + + debugfs_create_file("hairpin_num_queues", 0644, tc->dfs_root, + &tc->hairpin_params, &fops_hairpin_queues); + debugfs_create_file("hairpin_queue_size", 0644, tc->dfs_root, + &tc->hairpin_params, &fops_hairpin_queue_size); + debugfs_create_file("hairpin_num_active", 0444, tc->dfs_root, tc, + &fops_hairpin_num_active); + debugfs_create_file("hairpin_table_dump", 0444, tc->dfs_root, tc, + &debugfs_hairpin_table_dump_fops); +} + +static void +mlx5e_hairpin_params_init(struct mlx5e_hairpin_params *hairpin_params, + struct mlx5_core_dev *mdev) +{ + u64 link_speed64; + u32 link_speed; + + hairpin_params->mdev = mdev; + /* set hairpin pair per each 50Gbs share of the link */ + mlx5e_port_max_linkspeed(mdev, &link_speed); + link_speed = max_t(u32, link_speed, 50000); + link_speed64 = link_speed; + do_div(link_speed64, 50000); + hairpin_params->num_queues = link_speed64; + + hairpin_params->queue_size = + BIT(min_t(u32, 16 - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), + MLX5_CAP_GEN(mdev, log_max_hairpin_num_packets))); +} + static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5e_tc_flow_parse_attr *parse_attr, @@ -1027,8 +1167,6 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, struct mlx5_core_dev *peer_mdev; struct mlx5e_hairpin_entry *hpe; struct mlx5e_hairpin *hp; - u64 link_speed64; - u32 link_speed; u8 match_prio; u16 peer_id; int err; @@ -1081,21 +1219,16 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, hash_hairpin_info(peer_id, match_prio)); mutex_unlock(&tc->hairpin_tbl_lock); - params.log_data_size = clamp_t(u8, 16, - MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz), - MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); - params.log_num_packets = params.log_data_size - - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev); - params.log_num_packets = min_t(u8, params.log_num_packets, - MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets)); + params.log_num_packets = ilog2(tc->hairpin_params.queue_size); + params.log_data_size = + clamp_t(u32, + params.log_num_packets + + MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev), + MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz), + MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); params.q_counter = priv->q_counter; - /* set hairpin pair per each 50Gbs share of the link */ - mlx5e_port_max_linkspeed(priv->mdev, &link_speed); - link_speed = max_t(u32, link_speed, 50000); - link_speed64 = link_speed; - do_div(link_speed64, 50000); - params.num_channels = link_speed64; + params.num_channels = tc->hairpin_params.num_queues; hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); hpe->hp = hp; @@ -5218,6 +5351,8 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr, MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act); + mlx5e_hairpin_params_init(&tc->hairpin_params, dev); + tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; err = register_netdevice_notifier_dev_net(priv->netdev, &tc->netdevice_nb, @@ -5228,6 +5363,8 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) goto err_reg; } + mlx5e_tc_debugfs_init(tc, mlx5e_fs_get_debugfs_root(priv->fs)); + return 0; err_reg: @@ -5256,6 +5393,8 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) { struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + debugfs_remove_recursive(tc->dfs_root); + if (tc->netdevice_nb.notifier_call) unregister_netdevice_notifier_dev_net(priv->netdev, &tc->netdevice_nb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 0dfd5742c6fe..bbb6dab3b21f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1488,7 +1488,7 @@ int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 * void *hca_caps; int err; - if (!mlx5_core_is_ecpf(dev)) { + if (!mlx5_core_is_ecpf(dev) || mlx5_core_is_management_pf(dev)) { *max_sfs = 0; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 911cf4d23964..c2a4f86bc890 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -412,7 +412,8 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) int err; priv->fs = mlx5e_fs_init(priv->profile, mdev, - !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + !test_bit(MLX5E_STATE_DESTROYING, &priv->state), + priv->dfs_root); if (!priv->fs) { netdev_err(priv->netdev, "FS allocation failed\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index a4476cb4c3b3..fd2d31cdbcf9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -724,7 +724,6 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, struct mlx5dr_action *action) { struct postsend_info send_info = {}; - int ret; send_info.write.addr = (uintptr_t)action->rewrite->data; send_info.write.length = action->rewrite->num_of_actions * @@ -734,9 +733,7 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk); send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk); - ret = dr_postsend_icm_data(dmn, &send_info); - - return ret; + return dr_postsend_icm_data(dmn, &send_info); } static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev, diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 0848b5529d48..2bf18748581d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -831,7 +831,7 @@ static int qed_iov_enable_vf_access(struct qed_hwfn *p_hwfn, * @p_hwfn: HW device data. * @p_ptt: PTT window for writing the registers. * @vf: VF info data. - * @enable: The actual permision for this VF. + * @enable: The actual permission for this VF. * * In E4, queue zone permission table size is 320x9. There * are 320 VF queues for single engine device (256 for dual diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index dadd61bccfe7..c7ddcb829535 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -576,6 +576,7 @@ struct rtl8169_tc_offsets { enum rtl_flag { RTL_FLAG_TASK_ENABLED = 0, RTL_FLAG_TASK_RESET_PENDING, + RTL_FLAG_TASK_TX_TIMEOUT, RTL_FLAG_MAX }; @@ -3928,7 +3929,7 @@ static void rtl8169_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct rtl8169_private *tp = netdev_priv(dev); - rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING); + rtl_schedule_task(tp, RTL_FLAG_TASK_TX_TIMEOUT); } static int rtl8169_tx_map(struct rtl8169_private *tp, const u32 *opts, u32 len, @@ -4522,6 +4523,7 @@ static void rtl_task(struct work_struct *work) { struct rtl8169_private *tp = container_of(work, struct rtl8169_private, wk.work); + int ret; rtnl_lock(); @@ -4529,7 +4531,17 @@ static void rtl_task(struct work_struct *work) !test_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags)) goto out_unlock; + if (test_and_clear_bit(RTL_FLAG_TASK_TX_TIMEOUT, tp->wk.flags)) { + /* ASPM compatibility issues are a typical reason for tx timeouts */ + ret = pci_disable_link_state(tp->pci_dev, PCIE_LINK_STATE_L1 | + PCIE_LINK_STATE_L0S); + if (!ret) + netdev_warn_once(tp->dev, "ASPM disabled on Tx timeout\n"); + goto reset; + } + if (test_and_clear_bit(RTL_FLAG_TASK_RESET_PENDING, tp->wk.flags)) { +reset: rtl_reset_work(tp); netif_wake_queue(tp->dev); } diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 71a499113308..ed17163d7811 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3044,23 +3044,46 @@ static int sh_mdio_release(struct sh_eth_private *mdp) return 0; } -static int sh_mdiobb_read(struct mii_bus *bus, int phy, int reg) +static int sh_mdiobb_read_c22(struct mii_bus *bus, int phy, int reg) { int res; pm_runtime_get_sync(bus->parent); - res = mdiobb_read(bus, phy, reg); + res = mdiobb_read_c22(bus, phy, reg); pm_runtime_put(bus->parent); return res; } -static int sh_mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val) +static int sh_mdiobb_write_c22(struct mii_bus *bus, int phy, int reg, u16 val) { int res; pm_runtime_get_sync(bus->parent); - res = mdiobb_write(bus, phy, reg, val); + res = mdiobb_write_c22(bus, phy, reg, val); + pm_runtime_put(bus->parent); + + return res; +} + +static int sh_mdiobb_read_c45(struct mii_bus *bus, int phy, int devad, int reg) +{ + int res; + + pm_runtime_get_sync(bus->parent); + res = mdiobb_read_c45(bus, phy, devad, reg); + pm_runtime_put(bus->parent); + + return res; +} + +static int sh_mdiobb_write_c45(struct mii_bus *bus, int phy, int devad, + int reg, u16 val) +{ + int res; + + pm_runtime_get_sync(bus->parent); + res = mdiobb_write_c45(bus, phy, devad, reg, val); pm_runtime_put(bus->parent); return res; @@ -3091,8 +3114,10 @@ static int sh_mdio_init(struct sh_eth_private *mdp, return -ENOMEM; /* Wrap accessors with Runtime PM-aware ops */ - mdp->mii_bus->read = sh_mdiobb_read; - mdp->mii_bus->write = sh_mdiobb_write; + mdp->mii_bus->read = sh_mdiobb_read_c22; + mdp->mii_bus->write = sh_mdiobb_write_c22; + mdp->mii_bus->read_c45 = sh_mdiobb_read_c45; + mdp->mii_bus->write_c45 = sh_mdiobb_write_c45; /* Hook up MII support for ethtool */ mdp->mii_bus->name = "sh_mii"; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index ecbde83b5243..5cac98284184 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1416,6 +1416,68 @@ static const struct net_device_ops am65_cpsw_nuss_netdev_ops = { .ndo_setup_tc = am65_cpsw_qos_ndo_setup_tc, }; +static void am65_cpsw_disable_phy(struct phy *phy) +{ + phy_power_off(phy); + phy_exit(phy); +} + +static int am65_cpsw_enable_phy(struct phy *phy) +{ + int ret; + + ret = phy_init(phy); + if (ret < 0) + return ret; + + ret = phy_power_on(phy); + if (ret < 0) { + phy_exit(phy); + return ret; + } + + return 0; +} + +static void am65_cpsw_disable_serdes_phy(struct am65_cpsw_common *common) +{ + struct am65_cpsw_port *port; + struct phy *phy; + int i; + + for (i = 0; i < common->port_num; i++) { + port = &common->ports[i]; + phy = port->slave.serdes_phy; + if (phy) + am65_cpsw_disable_phy(phy); + } +} + +static int am65_cpsw_init_serdes_phy(struct device *dev, struct device_node *port_np, + struct am65_cpsw_port *port) +{ + const char *name = "serdes-phy"; + struct phy *phy; + int ret; + + phy = devm_of_phy_get(dev, port_np, name); + if (PTR_ERR(phy) == -ENODEV) + return 0; + + /* Serdes PHY exists. Store it. */ + port->slave.serdes_phy = phy; + + ret = am65_cpsw_enable_phy(phy); + if (ret < 0) + goto err_phy; + + return 0; + +err_phy: + devm_phy_put(dev, phy); + return ret; +} + static void am65_cpsw_nuss_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) { @@ -1959,6 +2021,11 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) goto of_node_put; } + /* Initialize the Serdes PHY for the port */ + ret = am65_cpsw_init_serdes_phy(dev, port_np, port); + if (ret) + return ret; + port->slave.mac_only = of_property_read_bool(port_np, "ti,mac-only"); @@ -2684,11 +2751,19 @@ static const struct am65_cpsw_pdata j7200_cpswxg_pdata = { .extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII), }; +static const struct am65_cpsw_pdata j721e_cpswxg_pdata = { + .quirks = 0, + .ale_dev_id = "am64-cpswxg", + .fdqring_mode = K3_RINGACC_RING_MODE_MESSAGE, + .extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII), +}; + static const struct of_device_id am65_cpsw_nuss_of_mtable[] = { { .compatible = "ti,am654-cpsw-nuss", .data = &am65x_sr1_0}, { .compatible = "ti,j721e-cpsw-nuss", .data = &j721e_pdata}, { .compatible = "ti,am642-cpsw-nuss", .data = &am64x_cpswxg_pdata}, { .compatible = "ti,j7200-cpswxg-nuss", .data = &j7200_cpswxg_pdata}, + { .compatible = "ti,j721e-cpswxg-nuss", .data = &j721e_cpswxg_pdata}, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, am65_cpsw_nuss_of_mtable); @@ -2870,6 +2945,7 @@ static int am65_cpsw_nuss_remove(struct platform_device *pdev) */ am65_cpsw_nuss_cleanup_ndev(common); am65_cpsw_nuss_phylink_cleanup(common); + am65_cpsw_disable_serdes_phy(common); of_platform_device_destroy(common->mdio_dev, NULL); diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index 4b75620f8d28..ed26768a6e51 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -32,6 +32,7 @@ struct am65_cpsw_slave_data { struct device_node *phy_node; phy_interface_t phy_if; struct phy *ifphy; + struct phy *serdes_phy; bool rx_pause; bool tx_pause; u8 mac_addr[ETH_ALEN]; diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index 946b9753ccfb..23169e36a3d4 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -225,7 +225,7 @@ static int davinci_get_mdio_data(struct mdiobb_ctrl *ctrl) return test_bit(MDIO_PIN, ®); } -static int davinci_mdiobb_read(struct mii_bus *bus, int phy, int reg) +static int davinci_mdiobb_read_c22(struct mii_bus *bus, int phy, int reg) { int ret; @@ -233,7 +233,7 @@ static int davinci_mdiobb_read(struct mii_bus *bus, int phy, int reg) if (ret < 0) return ret; - ret = mdiobb_read(bus, phy, reg); + ret = mdiobb_read_c22(bus, phy, reg); pm_runtime_mark_last_busy(bus->parent); pm_runtime_put_autosuspend(bus->parent); @@ -241,8 +241,8 @@ static int davinci_mdiobb_read(struct mii_bus *bus, int phy, int reg) return ret; } -static int davinci_mdiobb_write(struct mii_bus *bus, int phy, int reg, - u16 val) +static int davinci_mdiobb_write_c22(struct mii_bus *bus, int phy, int reg, + u16 val) { int ret; @@ -250,7 +250,41 @@ static int davinci_mdiobb_write(struct mii_bus *bus, int phy, int reg, if (ret < 0) return ret; - ret = mdiobb_write(bus, phy, reg, val); + ret = mdiobb_write_c22(bus, phy, reg, val); + + pm_runtime_mark_last_busy(bus->parent); + pm_runtime_put_autosuspend(bus->parent); + + return ret; +} + +static int davinci_mdiobb_read_c45(struct mii_bus *bus, int phy, int devad, + int reg) +{ + int ret; + + ret = pm_runtime_resume_and_get(bus->parent); + if (ret < 0) + return ret; + + ret = mdiobb_read_c45(bus, phy, devad, reg); + + pm_runtime_mark_last_busy(bus->parent); + pm_runtime_put_autosuspend(bus->parent); + + return ret; +} + +static int davinci_mdiobb_write_c45(struct mii_bus *bus, int phy, int devad, + int reg, u16 val) +{ + int ret; + + ret = pm_runtime_resume_and_get(bus->parent); + if (ret < 0) + return ret; + + ret = mdiobb_write_c45(bus, phy, devad, reg, val); pm_runtime_mark_last_busy(bus->parent); pm_runtime_put_autosuspend(bus->parent); @@ -573,8 +607,10 @@ static int davinci_mdio_probe(struct platform_device *pdev) data->bus->name = dev_name(dev); if (data->manual_mode) { - data->bus->read = davinci_mdiobb_read; - data->bus->write = davinci_mdiobb_write; + data->bus->read = davinci_mdiobb_read_c22; + data->bus->write = davinci_mdiobb_write_c22; + data->bus->read_c45 = davinci_mdiobb_read_c45; + data->bus->write_c45 = davinci_mdiobb_write_c45; data->bus->reset = davinci_mdiobb_reset; dev_info(dev, "Configuring MDIO in manual mode\n"); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index c57dc3238b3f..786e1090cf84 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -2,6 +2,7 @@ /* Copyright (c) 2015 - 2022 Beijing WangXun Technology Co., Ltd. */ #include <linux/etherdevice.h> +#include <linux/netdevice.h> #include <linux/if_ether.h> #include <linux/iopoll.h> #include <linux/pci.h> @@ -9,18 +10,18 @@ #include "wx_type.h" #include "wx_hw.h" -static void wx_intr_disable(struct wx_hw *wxhw, u64 qmask) +static void wx_intr_disable(struct wx *wx, u64 qmask) { u32 mask; mask = (qmask & 0xFFFFFFFF); if (mask) - wr32(wxhw, WX_PX_IMS(0), mask); + wr32(wx, WX_PX_IMS(0), mask); - if (wxhw->mac.type == wx_mac_sp) { + if (wx->mac.type == wx_mac_sp) { mask = (qmask >> 32); if (mask) - wr32(wxhw, WX_PX_IMS(1), mask); + wr32(wx, WX_PX_IMS(1), mask); } } @@ -28,33 +29,33 @@ static void wx_intr_disable(struct wx_hw *wxhw, u64 qmask) * 1. to be sector address, when implemented erase sector command * 2. to be flash address when implemented read, write flash address */ -static int wx_fmgr_cmd_op(struct wx_hw *wxhw, u32 cmd, u32 cmd_addr) +static int wx_fmgr_cmd_op(struct wx *wx, u32 cmd, u32 cmd_addr) { u32 cmd_val = 0, val = 0; cmd_val = WX_SPI_CMD_CMD(cmd) | WX_SPI_CMD_CLK(WX_SPI_CLK_DIV) | cmd_addr; - wr32(wxhw, WX_SPI_CMD, cmd_val); + wr32(wx, WX_SPI_CMD, cmd_val); return read_poll_timeout(rd32, val, (val & 0x1), 10, 100000, - false, wxhw, WX_SPI_STATUS); + false, wx, WX_SPI_STATUS); } -static int wx_flash_read_dword(struct wx_hw *wxhw, u32 addr, u32 *data) +static int wx_flash_read_dword(struct wx *wx, u32 addr, u32 *data) { int ret = 0; - ret = wx_fmgr_cmd_op(wxhw, WX_SPI_CMD_READ_DWORD, addr); + ret = wx_fmgr_cmd_op(wx, WX_SPI_CMD_READ_DWORD, addr); if (ret < 0) return ret; - *data = rd32(wxhw, WX_SPI_DATA); + *data = rd32(wx, WX_SPI_DATA); return ret; } -int wx_check_flash_load(struct wx_hw *hw, u32 check_bit) +int wx_check_flash_load(struct wx *hw, u32 check_bit) { u32 reg = 0; int err = 0; @@ -73,15 +74,15 @@ int wx_check_flash_load(struct wx_hw *hw, u32 check_bit) } EXPORT_SYMBOL(wx_check_flash_load); -void wx_control_hw(struct wx_hw *wxhw, bool drv) +void wx_control_hw(struct wx *wx, bool drv) { if (drv) { /* Let firmware know the driver has taken over */ - wr32m(wxhw, WX_CFG_PORT_CTL, + wr32m(wx, WX_CFG_PORT_CTL, WX_CFG_PORT_CTL_DRV_LOAD, WX_CFG_PORT_CTL_DRV_LOAD); } else { /* Let firmware take over control of hw */ - wr32m(wxhw, WX_CFG_PORT_CTL, + wr32m(wx, WX_CFG_PORT_CTL, WX_CFG_PORT_CTL_DRV_LOAD, 0); } } @@ -89,13 +90,13 @@ EXPORT_SYMBOL(wx_control_hw); /** * wx_mng_present - returns 0 when management capability is present - * @wxhw: pointer to hardware structure + * @wx: pointer to hardware structure */ -int wx_mng_present(struct wx_hw *wxhw) +int wx_mng_present(struct wx *wx) { u32 fwsm; - fwsm = rd32(wxhw, WX_MIS_ST); + fwsm = rd32(wx, WX_MIS_ST); if (fwsm & WX_MIS_ST_MNG_INIT_DN) return 0; else @@ -108,40 +109,40 @@ static DEFINE_MUTEX(wx_sw_sync_lock); /** * wx_release_sw_sync - Release SW semaphore - * @wxhw: pointer to hardware structure + * @wx: pointer to hardware structure * @mask: Mask to specify which semaphore to release * * Releases the SW semaphore for the specified * function (CSR, PHY0, PHY1, EEPROM, Flash) **/ -static void wx_release_sw_sync(struct wx_hw *wxhw, u32 mask) +static void wx_release_sw_sync(struct wx *wx, u32 mask) { mutex_lock(&wx_sw_sync_lock); - wr32m(wxhw, WX_MNG_SWFW_SYNC, mask, 0); + wr32m(wx, WX_MNG_SWFW_SYNC, mask, 0); mutex_unlock(&wx_sw_sync_lock); } /** * wx_acquire_sw_sync - Acquire SW semaphore - * @wxhw: pointer to hardware structure + * @wx: pointer to hardware structure * @mask: Mask to specify which semaphore to acquire * * Acquires the SW semaphore for the specified * function (CSR, PHY0, PHY1, EEPROM, Flash) **/ -static int wx_acquire_sw_sync(struct wx_hw *wxhw, u32 mask) +static int wx_acquire_sw_sync(struct wx *wx, u32 mask) { u32 sem = 0; int ret = 0; mutex_lock(&wx_sw_sync_lock); ret = read_poll_timeout(rd32, sem, !(sem & mask), - 5000, 2000000, false, wxhw, WX_MNG_SWFW_SYNC); + 5000, 2000000, false, wx, WX_MNG_SWFW_SYNC); if (!ret) { sem |= mask; - wr32(wxhw, WX_MNG_SWFW_SYNC, sem); + wr32(wx, WX_MNG_SWFW_SYNC, sem); } else { - wx_err(wxhw, "SW Semaphore not granted: 0x%x.\n", sem); + wx_err(wx, "SW Semaphore not granted: 0x%x.\n", sem); } mutex_unlock(&wx_sw_sync_lock); @@ -150,7 +151,7 @@ static int wx_acquire_sw_sync(struct wx_hw *wxhw, u32 mask) /** * wx_host_interface_command - Issue command to manageability block - * @wxhw: pointer to the HW structure + * @wx: pointer to the HW structure * @buffer: contains the command to write and where the return status will * be placed * @length: length of buffer, must be multiple of 4 bytes @@ -162,7 +163,7 @@ static int wx_acquire_sw_sync(struct wx_hw *wxhw, u32 mask) * So we will leave this up to the caller to read back the data * in these cases. **/ -int wx_host_interface_command(struct wx_hw *wxhw, u32 *buffer, +int wx_host_interface_command(struct wx *wx, u32 *buffer, u32 length, u32 timeout, bool return_data) { u32 hdr_size = sizeof(struct wx_hic_hdr); @@ -172,17 +173,17 @@ int wx_host_interface_command(struct wx_hw *wxhw, u32 *buffer, u16 buf_len; if (length == 0 || length > WX_HI_MAX_BLOCK_BYTE_LENGTH) { - wx_err(wxhw, "Buffer length failure buffersize=%d.\n", length); + wx_err(wx, "Buffer length failure buffersize=%d.\n", length); return -EINVAL; } - status = wx_acquire_sw_sync(wxhw, WX_MNG_SWFW_SYNC_SW_MB); + status = wx_acquire_sw_sync(wx, WX_MNG_SWFW_SYNC_SW_MB); if (status != 0) return status; /* Calculate length in DWORDs. We must be DWORD aligned */ if ((length % (sizeof(u32))) != 0) { - wx_err(wxhw, "Buffer length failure, not aligned to dword"); + wx_err(wx, "Buffer length failure, not aligned to dword"); status = -EINVAL; goto rel_out; } @@ -193,38 +194,38 @@ int wx_host_interface_command(struct wx_hw *wxhw, u32 *buffer, * into the ram area. */ for (i = 0; i < dword_len; i++) { - wr32a(wxhw, WX_MNG_MBOX, i, (__force u32)cpu_to_le32(buffer[i])); + wr32a(wx, WX_MNG_MBOX, i, (__force u32)cpu_to_le32(buffer[i])); /* write flush */ - buf[i] = rd32a(wxhw, WX_MNG_MBOX, i); + buf[i] = rd32a(wx, WX_MNG_MBOX, i); } /* Setting this bit tells the ARC that a new command is pending. */ - wr32m(wxhw, WX_MNG_MBOX_CTL, + wr32m(wx, WX_MNG_MBOX_CTL, WX_MNG_MBOX_CTL_SWRDY, WX_MNG_MBOX_CTL_SWRDY); status = read_poll_timeout(rd32, hicr, hicr & WX_MNG_MBOX_CTL_FWRDY, 1000, - timeout * 1000, false, wxhw, WX_MNG_MBOX_CTL); + timeout * 1000, false, wx, WX_MNG_MBOX_CTL); /* Check command completion */ if (status) { - wx_dbg(wxhw, "Command has failed with no status valid.\n"); + wx_dbg(wx, "Command has failed with no status valid.\n"); - buf[0] = rd32(wxhw, WX_MNG_MBOX); + buf[0] = rd32(wx, WX_MNG_MBOX); if ((buffer[0] & 0xff) != (~buf[0] >> 24)) { status = -EINVAL; goto rel_out; } if ((buf[0] & 0xff0000) >> 16 == 0x80) { - wx_dbg(wxhw, "It's unknown cmd.\n"); + wx_dbg(wx, "It's unknown cmd.\n"); status = -EINVAL; goto rel_out; } - wx_dbg(wxhw, "write value:\n"); + wx_dbg(wx, "write value:\n"); for (i = 0; i < dword_len; i++) - wx_dbg(wxhw, "%x ", buffer[i]); - wx_dbg(wxhw, "read value:\n"); + wx_dbg(wx, "%x ", buffer[i]); + wx_dbg(wx, "read value:\n"); for (i = 0; i < dword_len; i++) - wx_dbg(wxhw, "%x ", buf[i]); + wx_dbg(wx, "%x ", buf[i]); } if (!return_data) @@ -235,7 +236,7 @@ int wx_host_interface_command(struct wx_hw *wxhw, u32 *buffer, /* first pull in the header so we know the buffer length */ for (bi = 0; bi < dword_len; bi++) { - buffer[bi] = rd32a(wxhw, WX_MNG_MBOX, bi); + buffer[bi] = rd32a(wx, WX_MNG_MBOX, bi); le32_to_cpus(&buffer[bi]); } @@ -245,7 +246,7 @@ int wx_host_interface_command(struct wx_hw *wxhw, u32 *buffer, goto rel_out; if (length < buf_len + hdr_size) { - wx_err(wxhw, "Buffer not large enough for reply message.\n"); + wx_err(wx, "Buffer not large enough for reply message.\n"); status = -EFAULT; goto rel_out; } @@ -255,12 +256,12 @@ int wx_host_interface_command(struct wx_hw *wxhw, u32 *buffer, /* Pull in the rest of the buffer (bi is where we left off) */ for (; bi <= dword_len; bi++) { - buffer[bi] = rd32a(wxhw, WX_MNG_MBOX, bi); + buffer[bi] = rd32a(wx, WX_MNG_MBOX, bi); le32_to_cpus(&buffer[bi]); } rel_out: - wx_release_sw_sync(wxhw, WX_MNG_SWFW_SYNC_SW_MB); + wx_release_sw_sync(wx, WX_MNG_SWFW_SYNC_SW_MB); return status; } EXPORT_SYMBOL(wx_host_interface_command); @@ -268,13 +269,13 @@ EXPORT_SYMBOL(wx_host_interface_command); /** * wx_read_ee_hostif_data - Read EEPROM word using a host interface cmd * assuming that the semaphore is already obtained. - * @wxhw: pointer to hardware structure + * @wx: pointer to hardware structure * @offset: offset of word in the EEPROM to read * @data: word read from the EEPROM * * Reads a 16 bit word from the EEPROM using the hostif. **/ -static int wx_read_ee_hostif_data(struct wx_hw *wxhw, u16 offset, u16 *data) +static int wx_read_ee_hostif_data(struct wx *wx, u16 offset, u16 *data) { struct wx_hic_read_shadow_ram buffer; int status; @@ -289,33 +290,33 @@ static int wx_read_ee_hostif_data(struct wx_hw *wxhw, u16 offset, u16 *data) /* one word */ buffer.length = (__force u16)cpu_to_be16(sizeof(u16)); - status = wx_host_interface_command(wxhw, (u32 *)&buffer, sizeof(buffer), + status = wx_host_interface_command(wx, (u32 *)&buffer, sizeof(buffer), WX_HI_COMMAND_TIMEOUT, false); if (status != 0) return status; - *data = (u16)rd32a(wxhw, WX_MNG_MBOX, FW_NVM_DATA_OFFSET); + *data = (u16)rd32a(wx, WX_MNG_MBOX, FW_NVM_DATA_OFFSET); return status; } /** * wx_read_ee_hostif - Read EEPROM word using a host interface cmd - * @wxhw: pointer to hardware structure + * @wx: pointer to hardware structure * @offset: offset of word in the EEPROM to read * @data: word read from the EEPROM * * Reads a 16 bit word from the EEPROM using the hostif. **/ -int wx_read_ee_hostif(struct wx_hw *wxhw, u16 offset, u16 *data) +int wx_read_ee_hostif(struct wx *wx, u16 offset, u16 *data) { int status = 0; - status = wx_acquire_sw_sync(wxhw, WX_MNG_SWFW_SYNC_SW_FLASH); + status = wx_acquire_sw_sync(wx, WX_MNG_SWFW_SYNC_SW_FLASH); if (status == 0) { - status = wx_read_ee_hostif_data(wxhw, offset, data); - wx_release_sw_sync(wxhw, WX_MNG_SWFW_SYNC_SW_FLASH); + status = wx_read_ee_hostif_data(wx, offset, data); + wx_release_sw_sync(wx, WX_MNG_SWFW_SYNC_SW_FLASH); } return status; @@ -324,14 +325,14 @@ EXPORT_SYMBOL(wx_read_ee_hostif); /** * wx_read_ee_hostif_buffer- Read EEPROM word(s) using hostif - * @wxhw: pointer to hardware structure + * @wx: pointer to hardware structure * @offset: offset of word in the EEPROM to read * @words: number of words * @data: word(s) read from the EEPROM * * Reads a 16 bit word(s) from the EEPROM using the hostif. **/ -int wx_read_ee_hostif_buffer(struct wx_hw *wxhw, +int wx_read_ee_hostif_buffer(struct wx *wx, u16 offset, u16 words, u16 *data) { struct wx_hic_read_shadow_ram buffer; @@ -342,7 +343,7 @@ int wx_read_ee_hostif_buffer(struct wx_hw *wxhw, u32 i; /* Take semaphore for the entire operation. */ - status = wx_acquire_sw_sync(wxhw, WX_MNG_SWFW_SYNC_SW_FLASH); + status = wx_acquire_sw_sync(wx, WX_MNG_SWFW_SYNC_SW_FLASH); if (status != 0) return status; @@ -361,20 +362,20 @@ int wx_read_ee_hostif_buffer(struct wx_hw *wxhw, buffer.address = (__force u32)cpu_to_be32((offset + current_word) * 2); buffer.length = (__force u16)cpu_to_be16(words_to_read * 2); - status = wx_host_interface_command(wxhw, (u32 *)&buffer, + status = wx_host_interface_command(wx, (u32 *)&buffer, sizeof(buffer), WX_HI_COMMAND_TIMEOUT, false); if (status != 0) { - wx_err(wxhw, "Host interface command failed\n"); + wx_err(wx, "Host interface command failed\n"); goto out; } for (i = 0; i < words_to_read; i++) { u32 reg = WX_MNG_MBOX + (FW_NVM_DATA_OFFSET << 2) + 2 * i; - value = rd32(wxhw, reg); + value = rd32(wx, reg); data[current_word] = (u16)(value & 0xffff); current_word++; i++; @@ -388,7 +389,7 @@ int wx_read_ee_hostif_buffer(struct wx_hw *wxhw, } out: - wx_release_sw_sync(wxhw, WX_MNG_SWFW_SYNC_SW_FLASH); + wx_release_sw_sync(wx, WX_MNG_SWFW_SYNC_SW_FLASH); return status; } EXPORT_SYMBOL(wx_read_ee_hostif_buffer); @@ -416,12 +417,12 @@ static u8 wx_calculate_checksum(u8 *buffer, u32 length) /** * wx_reset_hostif - send reset cmd to fw - * @wxhw: pointer to hardware structure + * @wx: pointer to hardware structure * * Sends reset cmd to firmware through the manageability * block. **/ -int wx_reset_hostif(struct wx_hw *wxhw) +int wx_reset_hostif(struct wx *wx) { struct wx_hic_reset reset_cmd; int ret_val = 0; @@ -430,15 +431,15 @@ int wx_reset_hostif(struct wx_hw *wxhw) reset_cmd.hdr.cmd = FW_RESET_CMD; reset_cmd.hdr.buf_len = FW_RESET_LEN; reset_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED; - reset_cmd.lan_id = wxhw->bus.func; - reset_cmd.reset_type = (u16)wxhw->reset_type; + reset_cmd.lan_id = wx->bus.func; + reset_cmd.reset_type = (u16)wx->reset_type; reset_cmd.hdr.checksum = 0; reset_cmd.hdr.checksum = wx_calculate_checksum((u8 *)&reset_cmd, (FW_CEM_HDR_LEN + reset_cmd.hdr.buf_len)); for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) { - ret_val = wx_host_interface_command(wxhw, (u32 *)&reset_cmd, + ret_val = wx_host_interface_command(wx, (u32 *)&reset_cmd, sizeof(reset_cmd), WX_HI_COMMAND_TIMEOUT, true); @@ -460,14 +461,14 @@ EXPORT_SYMBOL(wx_reset_hostif); /** * wx_init_eeprom_params - Initialize EEPROM params - * @wxhw: pointer to hardware structure + * @wx: pointer to hardware structure * * Initializes the EEPROM parameters wx_eeprom_info within the * wx_hw struct in order to set up EEPROM access. **/ -void wx_init_eeprom_params(struct wx_hw *wxhw) +void wx_init_eeprom_params(struct wx *wx) { - struct wx_eeprom_info *eeprom = &wxhw->eeprom; + struct wx_eeprom_info *eeprom = &wx->eeprom; u16 eeprom_size; u16 data = 0x80; @@ -475,21 +476,21 @@ void wx_init_eeprom_params(struct wx_hw *wxhw) eeprom->semaphore_delay = 10; eeprom->type = wx_eeprom_none; - if (!(rd32(wxhw, WX_SPI_STATUS) & + if (!(rd32(wx, WX_SPI_STATUS) & WX_SPI_STATUS_FLASH_BYPASS)) { eeprom->type = wx_flash; eeprom_size = 4096; eeprom->word_size = eeprom_size >> 1; - wx_dbg(wxhw, "Eeprom params: type = %d, size = %d\n", + wx_dbg(wx, "Eeprom params: type = %d, size = %d\n", eeprom->type, eeprom->word_size); } } - if (wxhw->mac.type == wx_mac_sp) { - if (wx_read_ee_hostif(wxhw, WX_SW_REGION_PTR, &data)) { - wx_err(wxhw, "NVM Read Error\n"); + if (wx->mac.type == wx_mac_sp) { + if (wx_read_ee_hostif(wx, WX_SW_REGION_PTR, &data)) { + wx_err(wx, "NVM Read Error\n"); return; } data = data >> 1; @@ -501,22 +502,22 @@ EXPORT_SYMBOL(wx_init_eeprom_params); /** * wx_get_mac_addr - Generic get MAC address - * @wxhw: pointer to hardware structure + * @wx: pointer to hardware structure * @mac_addr: Adapter MAC address * * Reads the adapter's MAC address from first Receive Address Register (RAR0) * A reset of the adapter must be performed prior to calling this function * in order for the MAC address to have been loaded from the EEPROM into RAR0 **/ -void wx_get_mac_addr(struct wx_hw *wxhw, u8 *mac_addr) +void wx_get_mac_addr(struct wx *wx, u8 *mac_addr) { u32 rar_high; u32 rar_low; u16 i; - wr32(wxhw, WX_PSR_MAC_SWC_IDX, 0); - rar_high = rd32(wxhw, WX_PSR_MAC_SWC_AD_H); - rar_low = rd32(wxhw, WX_PSR_MAC_SWC_AD_L); + wr32(wx, WX_PSR_MAC_SWC_IDX, 0); + rar_high = rd32(wx, WX_PSR_MAC_SWC_AD_H); + rar_low = rd32(wx, WX_PSR_MAC_SWC_AD_L); for (i = 0; i < 2; i++) mac_addr[i] = (u8)(rar_high >> (1 - i) * 8); @@ -528,7 +529,7 @@ EXPORT_SYMBOL(wx_get_mac_addr); /** * wx_set_rar - Set Rx address register - * @wxhw: pointer to hardware structure + * @wx: pointer to hardware structure * @index: Receive address register to write * @addr: Address to put into receive address register * @pools: VMDq "set" or "pool" index @@ -536,25 +537,25 @@ EXPORT_SYMBOL(wx_get_mac_addr); * * Puts an ethernet address into a receive address register. **/ -int wx_set_rar(struct wx_hw *wxhw, u32 index, u8 *addr, u64 pools, - u32 enable_addr) +static int wx_set_rar(struct wx *wx, u32 index, u8 *addr, u64 pools, + u32 enable_addr) { - u32 rar_entries = wxhw->mac.num_rar_entries; + u32 rar_entries = wx->mac.num_rar_entries; u32 rar_low, rar_high; /* Make sure we are using a valid rar index range */ if (index >= rar_entries) { - wx_err(wxhw, "RAR index %d is out of range.\n", index); + wx_err(wx, "RAR index %d is out of range.\n", index); return -EINVAL; } /* select the MAC address */ - wr32(wxhw, WX_PSR_MAC_SWC_IDX, index); + wr32(wx, WX_PSR_MAC_SWC_IDX, index); /* setup VMDq pool mapping */ - wr32(wxhw, WX_PSR_MAC_SWC_VM_L, pools & 0xFFFFFFFF); - if (wxhw->mac.type == wx_mac_sp) - wr32(wxhw, WX_PSR_MAC_SWC_VM_H, pools >> 32); + wr32(wx, WX_PSR_MAC_SWC_VM_L, pools & 0xFFFFFFFF); + if (wx->mac.type == wx_mac_sp) + wr32(wx, WX_PSR_MAC_SWC_VM_H, pools >> 32); /* HW expects these in little endian so we reverse the byte * order from network order (big endian) to little endian @@ -572,8 +573,8 @@ int wx_set_rar(struct wx_hw *wxhw, u32 index, u8 *addr, u64 pools, if (enable_addr != 0) rar_high |= WX_PSR_MAC_SWC_AD_H_AV; - wr32(wxhw, WX_PSR_MAC_SWC_AD_L, rar_low); - wr32m(wxhw, WX_PSR_MAC_SWC_AD_H, + wr32(wx, WX_PSR_MAC_SWC_AD_L, rar_low); + wr32m(wx, WX_PSR_MAC_SWC_AD_H, (WX_PSR_MAC_SWC_AD_H_AD(~0) | WX_PSR_MAC_SWC_AD_H_ADTYPE(~0) | WX_PSR_MAC_SWC_AD_H_AV), @@ -581,22 +582,21 @@ int wx_set_rar(struct wx_hw *wxhw, u32 index, u8 *addr, u64 pools, return 0; } -EXPORT_SYMBOL(wx_set_rar); /** * wx_clear_rar - Remove Rx address register - * @wxhw: pointer to hardware structure + * @wx: pointer to hardware structure * @index: Receive address register to write * * Clears an ethernet address from a receive address register. **/ -int wx_clear_rar(struct wx_hw *wxhw, u32 index) +static int wx_clear_rar(struct wx *wx, u32 index) { - u32 rar_entries = wxhw->mac.num_rar_entries; + u32 rar_entries = wx->mac.num_rar_entries; /* Make sure we are using a valid rar index range */ if (index >= rar_entries) { - wx_err(wxhw, "RAR index %d is out of range.\n", index); + wx_err(wx, "RAR index %d is out of range.\n", index); return -EINVAL; } @@ -604,13 +604,13 @@ int wx_clear_rar(struct wx_hw *wxhw, u32 index) * so save everything except the lower 16 bits that hold part * of the address and the address valid bit. */ - wr32(wxhw, WX_PSR_MAC_SWC_IDX, index); + wr32(wx, WX_PSR_MAC_SWC_IDX, index); - wr32(wxhw, WX_PSR_MAC_SWC_VM_L, 0); - wr32(wxhw, WX_PSR_MAC_SWC_VM_H, 0); + wr32(wx, WX_PSR_MAC_SWC_VM_L, 0); + wr32(wx, WX_PSR_MAC_SWC_VM_H, 0); - wr32(wxhw, WX_PSR_MAC_SWC_AD_L, 0); - wr32m(wxhw, WX_PSR_MAC_SWC_AD_H, + wr32(wx, WX_PSR_MAC_SWC_AD_L, 0); + wr32m(wx, WX_PSR_MAC_SWC_AD_H, (WX_PSR_MAC_SWC_AD_H_AD(~0) | WX_PSR_MAC_SWC_AD_H_ADTYPE(~0) | WX_PSR_MAC_SWC_AD_H_AV), @@ -618,64 +618,63 @@ int wx_clear_rar(struct wx_hw *wxhw, u32 index) return 0; } -EXPORT_SYMBOL(wx_clear_rar); /** * wx_clear_vmdq - Disassociate a VMDq pool index from a rx address - * @wxhw: pointer to hardware struct + * @wx: pointer to hardware struct * @rar: receive address register index to disassociate * @vmdq: VMDq pool index to remove from the rar **/ -static int wx_clear_vmdq(struct wx_hw *wxhw, u32 rar, u32 __maybe_unused vmdq) +static int wx_clear_vmdq(struct wx *wx, u32 rar, u32 __maybe_unused vmdq) { - u32 rar_entries = wxhw->mac.num_rar_entries; + u32 rar_entries = wx->mac.num_rar_entries; u32 mpsar_lo, mpsar_hi; /* Make sure we are using a valid rar index range */ if (rar >= rar_entries) { - wx_err(wxhw, "RAR index %d is out of range.\n", rar); + wx_err(wx, "RAR index %d is out of range.\n", rar); return -EINVAL; } - wr32(wxhw, WX_PSR_MAC_SWC_IDX, rar); - mpsar_lo = rd32(wxhw, WX_PSR_MAC_SWC_VM_L); - mpsar_hi = rd32(wxhw, WX_PSR_MAC_SWC_VM_H); + wr32(wx, WX_PSR_MAC_SWC_IDX, rar); + mpsar_lo = rd32(wx, WX_PSR_MAC_SWC_VM_L); + mpsar_hi = rd32(wx, WX_PSR_MAC_SWC_VM_H); if (!mpsar_lo && !mpsar_hi) return 0; /* was that the last pool using this rar? */ if (mpsar_lo == 0 && mpsar_hi == 0 && rar != 0) - wx_clear_rar(wxhw, rar); + wx_clear_rar(wx, rar); return 0; } /** * wx_init_uta_tables - Initialize the Unicast Table Array - * @wxhw: pointer to hardware structure + * @wx: pointer to hardware structure **/ -static void wx_init_uta_tables(struct wx_hw *wxhw) +static void wx_init_uta_tables(struct wx *wx) { int i; - wx_dbg(wxhw, " Clearing UTA\n"); + wx_dbg(wx, " Clearing UTA\n"); for (i = 0; i < 128; i++) - wr32(wxhw, WX_PSR_UC_TBL(i), 0); + wr32(wx, WX_PSR_UC_TBL(i), 0); } /** * wx_init_rx_addrs - Initializes receive address filters. - * @wxhw: pointer to hardware structure + * @wx: pointer to hardware structure * * Places the MAC address in receive address register 0 and clears the rest * of the receive address registers. Clears the multicast table. Assumes * the receiver is in reset when the routine is called. **/ -void wx_init_rx_addrs(struct wx_hw *wxhw) +void wx_init_rx_addrs(struct wx *wx) { - u32 rar_entries = wxhw->mac.num_rar_entries; + u32 rar_entries = wx->mac.num_rar_entries; u32 psrctl; int i; @@ -683,67 +682,166 @@ void wx_init_rx_addrs(struct wx_hw *wxhw) * to the permanent address. * Otherwise, use the permanent address from the eeprom. */ - if (!is_valid_ether_addr(wxhw->mac.addr)) { + if (!is_valid_ether_addr(wx->mac.addr)) { /* Get the MAC address from the RAR0 for later reference */ - wx_get_mac_addr(wxhw, wxhw->mac.addr); - wx_dbg(wxhw, "Keeping Current RAR0 Addr = %pM\n", wxhw->mac.addr); + wx_get_mac_addr(wx, wx->mac.addr); + wx_dbg(wx, "Keeping Current RAR0 Addr = %pM\n", wx->mac.addr); } else { /* Setup the receive address. */ - wx_dbg(wxhw, "Overriding MAC Address in RAR[0]\n"); - wx_dbg(wxhw, "New MAC Addr = %pM\n", wxhw->mac.addr); + wx_dbg(wx, "Overriding MAC Address in RAR[0]\n"); + wx_dbg(wx, "New MAC Addr = %pM\n", wx->mac.addr); - wx_set_rar(wxhw, 0, wxhw->mac.addr, 0, WX_PSR_MAC_SWC_AD_H_AV); + wx_set_rar(wx, 0, wx->mac.addr, 0, WX_PSR_MAC_SWC_AD_H_AV); - if (wxhw->mac.type == wx_mac_sp) { + if (wx->mac.type == wx_mac_sp) { /* clear VMDq pool/queue selection for RAR 0 */ - wx_clear_vmdq(wxhw, 0, WX_CLEAR_VMDQ_ALL); + wx_clear_vmdq(wx, 0, WX_CLEAR_VMDQ_ALL); } } /* Zero out the other receive addresses. */ - wx_dbg(wxhw, "Clearing RAR[1-%d]\n", rar_entries - 1); + wx_dbg(wx, "Clearing RAR[1-%d]\n", rar_entries - 1); for (i = 1; i < rar_entries; i++) { - wr32(wxhw, WX_PSR_MAC_SWC_IDX, i); - wr32(wxhw, WX_PSR_MAC_SWC_AD_L, 0); - wr32(wxhw, WX_PSR_MAC_SWC_AD_H, 0); + wr32(wx, WX_PSR_MAC_SWC_IDX, i); + wr32(wx, WX_PSR_MAC_SWC_AD_L, 0); + wr32(wx, WX_PSR_MAC_SWC_AD_H, 0); } /* Clear the MTA */ - wxhw->addr_ctrl.mta_in_use = 0; - psrctl = rd32(wxhw, WX_PSR_CTL); + wx->addr_ctrl.mta_in_use = 0; + psrctl = rd32(wx, WX_PSR_CTL); psrctl &= ~(WX_PSR_CTL_MO | WX_PSR_CTL_MFE); - psrctl |= wxhw->mac.mc_filter_type << WX_PSR_CTL_MO_SHIFT; - wr32(wxhw, WX_PSR_CTL, psrctl); - wx_dbg(wxhw, " Clearing MTA\n"); - for (i = 0; i < wxhw->mac.mcft_size; i++) - wr32(wxhw, WX_PSR_MC_TBL(i), 0); + psrctl |= wx->mac.mc_filter_type << WX_PSR_CTL_MO_SHIFT; + wr32(wx, WX_PSR_CTL, psrctl); + wx_dbg(wx, " Clearing MTA\n"); + for (i = 0; i < wx->mac.mcft_size; i++) + wr32(wx, WX_PSR_MC_TBL(i), 0); - wx_init_uta_tables(wxhw); + wx_init_uta_tables(wx); } EXPORT_SYMBOL(wx_init_rx_addrs); -void wx_disable_rx(struct wx_hw *wxhw) +static void wx_sync_mac_table(struct wx *wx) +{ + int i; + + for (i = 0; i < wx->mac.num_rar_entries; i++) { + if (wx->mac_table[i].state & WX_MAC_STATE_MODIFIED) { + if (wx->mac_table[i].state & WX_MAC_STATE_IN_USE) { + wx_set_rar(wx, i, + wx->mac_table[i].addr, + wx->mac_table[i].pools, + WX_PSR_MAC_SWC_AD_H_AV); + } else { + wx_clear_rar(wx, i); + } + wx->mac_table[i].state &= ~(WX_MAC_STATE_MODIFIED); + } + } +} + +/* this function destroys the first RAR entry */ +void wx_mac_set_default_filter(struct wx *wx, u8 *addr) +{ + memcpy(&wx->mac_table[0].addr, addr, ETH_ALEN); + wx->mac_table[0].pools = 1ULL; + wx->mac_table[0].state = (WX_MAC_STATE_DEFAULT | WX_MAC_STATE_IN_USE); + wx_set_rar(wx, 0, wx->mac_table[0].addr, + wx->mac_table[0].pools, + WX_PSR_MAC_SWC_AD_H_AV); +} +EXPORT_SYMBOL(wx_mac_set_default_filter); + +void wx_flush_sw_mac_table(struct wx *wx) +{ + u32 i; + + for (i = 0; i < wx->mac.num_rar_entries; i++) { + if (!(wx->mac_table[i].state & WX_MAC_STATE_IN_USE)) + continue; + + wx->mac_table[i].state |= WX_MAC_STATE_MODIFIED; + wx->mac_table[i].state &= ~WX_MAC_STATE_IN_USE; + memset(wx->mac_table[i].addr, 0, ETH_ALEN); + wx->mac_table[i].pools = 0; + } + wx_sync_mac_table(wx); +} +EXPORT_SYMBOL(wx_flush_sw_mac_table); + +static int wx_del_mac_filter(struct wx *wx, u8 *addr, u16 pool) +{ + u32 i; + + if (is_zero_ether_addr(addr)) + return -EINVAL; + + /* search table for addr, if found, set to 0 and sync */ + for (i = 0; i < wx->mac.num_rar_entries; i++) { + if (!ether_addr_equal(addr, wx->mac_table[i].addr)) + continue; + + wx->mac_table[i].state |= WX_MAC_STATE_MODIFIED; + wx->mac_table[i].pools &= ~(1ULL << pool); + if (!wx->mac_table[i].pools) { + wx->mac_table[i].state &= ~WX_MAC_STATE_IN_USE; + memset(wx->mac_table[i].addr, 0, ETH_ALEN); + } + wx_sync_mac_table(wx); + return 0; + } + return -ENOMEM; +} + +/** + * wx_set_mac - Change the Ethernet Address of the NIC + * @netdev: network interface device structure + * @p: pointer to an address structure + * + * Returns 0 on success, negative on failure + **/ +int wx_set_mac(struct net_device *netdev, void *p) +{ + struct wx *wx = netdev_priv(netdev); + struct sockaddr *addr = p; + int retval; + + retval = eth_prepare_mac_addr_change(netdev, addr); + if (retval) + return retval; + + wx_del_mac_filter(wx, wx->mac.addr, 0); + eth_hw_addr_set(netdev, addr->sa_data); + memcpy(wx->mac.addr, addr->sa_data, netdev->addr_len); + + wx_mac_set_default_filter(wx, wx->mac.addr); + + return 0; +} +EXPORT_SYMBOL(wx_set_mac); + +void wx_disable_rx(struct wx *wx) { u32 pfdtxgswc; u32 rxctrl; - rxctrl = rd32(wxhw, WX_RDB_PB_CTL); + rxctrl = rd32(wx, WX_RDB_PB_CTL); if (rxctrl & WX_RDB_PB_CTL_RXEN) { - pfdtxgswc = rd32(wxhw, WX_PSR_CTL); + pfdtxgswc = rd32(wx, WX_PSR_CTL); if (pfdtxgswc & WX_PSR_CTL_SW_EN) { pfdtxgswc &= ~WX_PSR_CTL_SW_EN; - wr32(wxhw, WX_PSR_CTL, pfdtxgswc); - wxhw->mac.set_lben = true; + wr32(wx, WX_PSR_CTL, pfdtxgswc); + wx->mac.set_lben = true; } else { - wxhw->mac.set_lben = false; + wx->mac.set_lben = false; } rxctrl &= ~WX_RDB_PB_CTL_RXEN; - wr32(wxhw, WX_RDB_PB_CTL, rxctrl); + wr32(wx, WX_RDB_PB_CTL, rxctrl); - if (!(((wxhw->subsystem_device_id & WX_NCSI_MASK) == WX_NCSI_SUP) || - ((wxhw->subsystem_device_id & WX_WOL_MASK) == WX_WOL_SUP))) { + if (!(((wx->subsystem_device_id & WX_NCSI_MASK) == WX_NCSI_SUP) || + ((wx->subsystem_device_id & WX_WOL_MASK) == WX_WOL_SUP))) { /* disable mac receiver */ - wr32m(wxhw, WX_MAC_RX_CFG, + wr32m(wx, WX_MAC_RX_CFG, WX_MAC_RX_CFG_RE, 0); } } @@ -752,28 +850,28 @@ EXPORT_SYMBOL(wx_disable_rx); /** * wx_disable_pcie_master - Disable PCI-express master access - * @wxhw: pointer to hardware structure + * @wx: pointer to hardware structure * * Disables PCI-Express master access and verifies there are no pending * requests. **/ -int wx_disable_pcie_master(struct wx_hw *wxhw) +int wx_disable_pcie_master(struct wx *wx) { int status = 0; u32 val; /* Always set this bit to ensure any future transactions are blocked */ - pci_clear_master(wxhw->pdev); + pci_clear_master(wx->pdev); /* Exit if master requests are blocked */ - if (!(rd32(wxhw, WX_PX_TRANSACTION_PENDING))) + if (!(rd32(wx, WX_PX_TRANSACTION_PENDING))) return 0; /* Poll for master request bit to clear */ status = read_poll_timeout(rd32, val, !val, 100, WX_PCI_MASTER_DISABLE_TIMEOUT, - false, wxhw, WX_PX_TRANSACTION_PENDING); + false, wx, WX_PX_TRANSACTION_PENDING); if (status < 0) - wx_err(wxhw, "PCIe transaction pending bit did not clear.\n"); + wx_err(wx, "PCIe transaction pending bit did not clear.\n"); return status; } @@ -781,106 +879,106 @@ EXPORT_SYMBOL(wx_disable_pcie_master); /** * wx_stop_adapter - Generic stop Tx/Rx units - * @wxhw: pointer to hardware structure + * @wx: pointer to hardware structure * * Sets the adapter_stopped flag within wx_hw struct. Clears interrupts, * disables transmit and receive units. The adapter_stopped flag is used by * the shared code and drivers to determine if the adapter is in a stopped * state and should not touch the hardware. **/ -int wx_stop_adapter(struct wx_hw *wxhw) +int wx_stop_adapter(struct wx *wx) { u16 i; /* Set the adapter_stopped flag so other driver functions stop touching * the hardware */ - wxhw->adapter_stopped = true; + wx->adapter_stopped = true; /* Disable the receive unit */ - wx_disable_rx(wxhw); + wx_disable_rx(wx); /* Set interrupt mask to stop interrupts from being generated */ - wx_intr_disable(wxhw, WX_INTR_ALL); + wx_intr_disable(wx, WX_INTR_ALL); /* Clear any pending interrupts, flush previous writes */ - wr32(wxhw, WX_PX_MISC_IC, 0xffffffff); - wr32(wxhw, WX_BME_CTL, 0x3); + wr32(wx, WX_PX_MISC_IC, 0xffffffff); + wr32(wx, WX_BME_CTL, 0x3); /* Disable the transmit unit. Each queue must be disabled. */ - for (i = 0; i < wxhw->mac.max_tx_queues; i++) { - wr32m(wxhw, WX_PX_TR_CFG(i), + for (i = 0; i < wx->mac.max_tx_queues; i++) { + wr32m(wx, WX_PX_TR_CFG(i), WX_PX_TR_CFG_SWFLSH | WX_PX_TR_CFG_ENABLE, WX_PX_TR_CFG_SWFLSH); } /* Disable the receive unit by stopping each queue */ - for (i = 0; i < wxhw->mac.max_rx_queues; i++) { - wr32m(wxhw, WX_PX_RR_CFG(i), + for (i = 0; i < wx->mac.max_rx_queues; i++) { + wr32m(wx, WX_PX_RR_CFG(i), WX_PX_RR_CFG_RR_EN, 0); } /* flush all queues disables */ - WX_WRITE_FLUSH(wxhw); + WX_WRITE_FLUSH(wx); /* Prevent the PCI-E bus from hanging by disabling PCI-E master * access and verify no pending requests */ - return wx_disable_pcie_master(wxhw); + return wx_disable_pcie_master(wx); } EXPORT_SYMBOL(wx_stop_adapter); -void wx_reset_misc(struct wx_hw *wxhw) +void wx_reset_misc(struct wx *wx) { int i; /* receive packets that size > 2048 */ - wr32m(wxhw, WX_MAC_RX_CFG, WX_MAC_RX_CFG_JE, WX_MAC_RX_CFG_JE); + wr32m(wx, WX_MAC_RX_CFG, WX_MAC_RX_CFG_JE, WX_MAC_RX_CFG_JE); /* clear counters on read */ - wr32m(wxhw, WX_MMC_CONTROL, + wr32m(wx, WX_MMC_CONTROL, WX_MMC_CONTROL_RSTONRD, WX_MMC_CONTROL_RSTONRD); - wr32m(wxhw, WX_MAC_RX_FLOW_CTRL, + wr32m(wx, WX_MAC_RX_FLOW_CTRL, WX_MAC_RX_FLOW_CTRL_RFE, WX_MAC_RX_FLOW_CTRL_RFE); - wr32(wxhw, WX_MAC_PKT_FLT, WX_MAC_PKT_FLT_PR); + wr32(wx, WX_MAC_PKT_FLT, WX_MAC_PKT_FLT_PR); - wr32m(wxhw, WX_MIS_RST_ST, + wr32m(wx, WX_MIS_RST_ST, WX_MIS_RST_ST_RST_INIT, 0x1E00); /* errata 4: initialize mng flex tbl and wakeup flex tbl*/ - wr32(wxhw, WX_PSR_MNG_FLEX_SEL, 0); + wr32(wx, WX_PSR_MNG_FLEX_SEL, 0); for (i = 0; i < 16; i++) { - wr32(wxhw, WX_PSR_MNG_FLEX_DW_L(i), 0); - wr32(wxhw, WX_PSR_MNG_FLEX_DW_H(i), 0); - wr32(wxhw, WX_PSR_MNG_FLEX_MSK(i), 0); + wr32(wx, WX_PSR_MNG_FLEX_DW_L(i), 0); + wr32(wx, WX_PSR_MNG_FLEX_DW_H(i), 0); + wr32(wx, WX_PSR_MNG_FLEX_MSK(i), 0); } - wr32(wxhw, WX_PSR_LAN_FLEX_SEL, 0); + wr32(wx, WX_PSR_LAN_FLEX_SEL, 0); for (i = 0; i < 16; i++) { - wr32(wxhw, WX_PSR_LAN_FLEX_DW_L(i), 0); - wr32(wxhw, WX_PSR_LAN_FLEX_DW_H(i), 0); - wr32(wxhw, WX_PSR_LAN_FLEX_MSK(i), 0); + wr32(wx, WX_PSR_LAN_FLEX_DW_L(i), 0); + wr32(wx, WX_PSR_LAN_FLEX_DW_H(i), 0); + wr32(wx, WX_PSR_LAN_FLEX_MSK(i), 0); } /* set pause frame dst mac addr */ - wr32(wxhw, WX_RDB_PFCMACDAL, 0xC2000001); - wr32(wxhw, WX_RDB_PFCMACDAH, 0x0180); + wr32(wx, WX_RDB_PFCMACDAL, 0xC2000001); + wr32(wx, WX_RDB_PFCMACDAH, 0x0180); } EXPORT_SYMBOL(wx_reset_misc); /** * wx_get_pcie_msix_counts - Gets MSI-X vector count - * @wxhw: pointer to hardware structure + * @wx: pointer to hardware structure * @msix_count: number of MSI interrupts that can be obtained * @max_msix_count: number of MSI interrupts that mac need * * Read PCIe configuration space, and get the MSI-X vector count from * the capabilities table. **/ -int wx_get_pcie_msix_counts(struct wx_hw *wxhw, u16 *msix_count, u16 max_msix_count) +int wx_get_pcie_msix_counts(struct wx *wx, u16 *msix_count, u16 max_msix_count) { - struct pci_dev *pdev = wxhw->pdev; + struct pci_dev *pdev = wx->pdev; struct device *dev = &pdev->dev; int pos; @@ -904,31 +1002,39 @@ int wx_get_pcie_msix_counts(struct wx_hw *wxhw, u16 *msix_count, u16 max_msix_co } EXPORT_SYMBOL(wx_get_pcie_msix_counts); -int wx_sw_init(struct wx_hw *wxhw) +int wx_sw_init(struct wx *wx) { - struct pci_dev *pdev = wxhw->pdev; + struct pci_dev *pdev = wx->pdev; u32 ssid = 0; int err = 0; - wxhw->vendor_id = pdev->vendor; - wxhw->device_id = pdev->device; - wxhw->revision_id = pdev->revision; - wxhw->oem_svid = pdev->subsystem_vendor; - wxhw->oem_ssid = pdev->subsystem_device; - wxhw->bus.device = PCI_SLOT(pdev->devfn); - wxhw->bus.func = PCI_FUNC(pdev->devfn); - - if (wxhw->oem_svid == PCI_VENDOR_ID_WANGXUN) { - wxhw->subsystem_vendor_id = pdev->subsystem_vendor; - wxhw->subsystem_device_id = pdev->subsystem_device; + wx->vendor_id = pdev->vendor; + wx->device_id = pdev->device; + wx->revision_id = pdev->revision; + wx->oem_svid = pdev->subsystem_vendor; + wx->oem_ssid = pdev->subsystem_device; + wx->bus.device = PCI_SLOT(pdev->devfn); + wx->bus.func = PCI_FUNC(pdev->devfn); + + if (wx->oem_svid == PCI_VENDOR_ID_WANGXUN) { + wx->subsystem_vendor_id = pdev->subsystem_vendor; + wx->subsystem_device_id = pdev->subsystem_device; } else { - err = wx_flash_read_dword(wxhw, 0xfffdc, &ssid); + err = wx_flash_read_dword(wx, 0xfffdc, &ssid); if (!err) - wxhw->subsystem_device_id = swab16((u16)ssid); + wx->subsystem_device_id = swab16((u16)ssid); return err; } + wx->mac_table = kcalloc(wx->mac.num_rar_entries, + sizeof(struct wx_mac_addr), + GFP_KERNEL); + if (!wx->mac_table) { + wx_err(wx, "mac_table allocation failed\n"); + return -ENOMEM; + } + return 0; } EXPORT_SYMBOL(wx_sw_init); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h index a0652f5e9939..803983546f3a 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h @@ -4,25 +4,26 @@ #ifndef _WX_HW_H_ #define _WX_HW_H_ -int wx_check_flash_load(struct wx_hw *hw, u32 check_bit); -void wx_control_hw(struct wx_hw *wxhw, bool drv); -int wx_mng_present(struct wx_hw *wxhw); -int wx_host_interface_command(struct wx_hw *wxhw, u32 *buffer, +int wx_check_flash_load(struct wx *wx, u32 check_bit); +void wx_control_hw(struct wx *wx, bool drv); +int wx_mng_present(struct wx *wx); +int wx_host_interface_command(struct wx *wx, u32 *buffer, u32 length, u32 timeout, bool return_data); -int wx_read_ee_hostif(struct wx_hw *wxhw, u16 offset, u16 *data); -int wx_read_ee_hostif_buffer(struct wx_hw *wxhw, +int wx_read_ee_hostif(struct wx *wx, u16 offset, u16 *data); +int wx_read_ee_hostif_buffer(struct wx *wx, u16 offset, u16 words, u16 *data); -int wx_reset_hostif(struct wx_hw *wxhw); -void wx_init_eeprom_params(struct wx_hw *wxhw); -void wx_get_mac_addr(struct wx_hw *wxhw, u8 *mac_addr); -int wx_set_rar(struct wx_hw *wxhw, u32 index, u8 *addr, u64 pools, u32 enable_addr); -int wx_clear_rar(struct wx_hw *wxhw, u32 index); -void wx_init_rx_addrs(struct wx_hw *wxhw); -void wx_disable_rx(struct wx_hw *wxhw); -int wx_disable_pcie_master(struct wx_hw *wxhw); -int wx_stop_adapter(struct wx_hw *wxhw); -void wx_reset_misc(struct wx_hw *wxhw); -int wx_get_pcie_msix_counts(struct wx_hw *wxhw, u16 *msix_count, u16 max_msix_count); -int wx_sw_init(struct wx_hw *wxhw); +int wx_reset_hostif(struct wx *wx); +void wx_init_eeprom_params(struct wx *wx); +void wx_get_mac_addr(struct wx *wx, u8 *mac_addr); +void wx_init_rx_addrs(struct wx *wx); +void wx_mac_set_default_filter(struct wx *wx, u8 *addr); +void wx_flush_sw_mac_table(struct wx *wx); +int wx_set_mac(struct net_device *netdev, void *p); +void wx_disable_rx(struct wx *wx); +int wx_disable_pcie_master(struct wx *wx); +int wx_stop_adapter(struct wx *wx); +void wx_reset_misc(struct wx *wx); +int wx_get_pcie_msix_counts(struct wx *wx, u16 *msix_count, u16 max_msix_count); +int wx_sw_init(struct wx *wx); #endif /* _WX_HW_H_ */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 1cbeef8230bf..a52908d01c9c 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -185,6 +185,12 @@ #define WX_SW_REGION_PTR 0x1C +#define WX_MAC_STATE_DEFAULT 0x1 +#define WX_MAC_STATE_MODIFIED 0x2 +#define WX_MAC_STATE_IN_USE 0x4 + +#define WX_CFG_PORT_ST 0x14404 + /* Host Interface Command Structures */ struct wx_hic_hdr { u8 cmd; @@ -249,6 +255,12 @@ enum wx_mac_type { wx_mac_em }; +enum em_mac_type { + em_mac_type_unknown = 0, + em_mac_type_mdi, + em_mac_type_rgmii +}; + struct wx_mac_info { enum wx_mac_type type; bool set_lben; @@ -284,19 +296,28 @@ struct wx_addr_filter_info { bool user_set_promisc; }; +struct wx_mac_addr { + u8 addr[ETH_ALEN]; + u16 state; /* bitmask */ + u64 pools; +}; + enum wx_reset_type { WX_LAN_RESET = 0, WX_SW_RESET, WX_GLOBAL_RESET }; -struct wx_hw { +struct wx { u8 __iomem *hw_addr; struct pci_dev *pdev; + struct net_device *netdev; struct wx_bus_info bus; struct wx_mac_info mac; + enum em_mac_type mac_type; struct wx_eeprom_info eeprom; struct wx_addr_filter_info addr_ctrl; + struct wx_mac_addr *mac_table; u16 device_id; u16 vendor_id; u16 subsystem_device_id; @@ -304,8 +325,39 @@ struct wx_hw { u8 revision_id; u16 oem_ssid; u16 oem_svid; + u16 msg_enable; bool adapter_stopped; + char eeprom_id[32]; enum wx_reset_type reset_type; + + bool wol_enabled; + bool ncsi_enabled; + bool gpio_ctrl; + + /* Tx fast path data */ + int num_tx_queues; + u16 tx_itr_setting; + u16 tx_work_limit; + + /* Rx fast path data */ + int num_rx_queues; + u16 rx_itr_setting; + u16 rx_work_limit; + + int num_q_vectors; /* current number of q_vectors for device */ + int max_q_vectors; /* upper limit of q_vectors for device */ + + u32 tx_ring_count; + u32 rx_ring_count; + +#define WX_MAX_RETA_ENTRIES 128 + u8 rss_indir_tbl[WX_MAX_RETA_ENTRIES]; + +#define WX_RSS_KEY_SIZE 40 /* size of RSS Hash Key in bytes */ + u32 *rss_key; + u32 wol; + + u16 bd_number; }; #define WX_INTR_ALL (~0ULL) @@ -319,23 +371,23 @@ struct wx_hw { wr32((a), (reg) + ((off) << 2), (val)) static inline u32 -rd32m(struct wx_hw *wxhw, u32 reg, u32 mask) +rd32m(struct wx *wx, u32 reg, u32 mask) { u32 val; - val = rd32(wxhw, reg); + val = rd32(wx, reg); return val & mask; } static inline void -wr32m(struct wx_hw *wxhw, u32 reg, u32 mask, u32 field) +wr32m(struct wx *wx, u32 reg, u32 mask, u32 field) { u32 val; - val = rd32(wxhw, reg); + val = rd32(wx, reg); val = ((val & ~mask) | (field & mask)); - wr32(wxhw, reg, val); + wr32(wx, reg, val); } /* On some domestic CPU platforms, sometimes IO is not synchronized with @@ -343,10 +395,10 @@ wr32m(struct wx_hw *wxhw, u32 reg, u32 mask, u32 field) */ #define WX_WRITE_FLUSH(H) rd32(H, WX_MIS_PWR) -#define wx_err(wxhw, fmt, arg...) \ - dev_err(&(wxhw)->pdev->dev, fmt, ##arg) +#define wx_err(wx, fmt, arg...) \ + dev_err(&(wx)->pdev->dev, fmt, ##arg) -#define wx_dbg(wxhw, fmt, arg...) \ - dev_dbg(&(wxhw)->pdev->dev, fmt, ##arg) +#define wx_dbg(wx, fmt, arg...) \ + dev_dbg(&(wx)->pdev->dev, fmt, ##arg) #endif /* _WX_TYPE_H_ */ diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe.h b/drivers/net/ethernet/wangxun/ngbe/ngbe.h deleted file mode 100644 index af147ca8605c..000000000000 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe.h +++ /dev/null @@ -1,79 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2019 - 2022 Beijing WangXun Technology Co., Ltd. */ - -#ifndef _NGBE_H_ -#define _NGBE_H_ - -#include "ngbe_type.h" - -#define NGBE_MAX_FDIR_INDICES 7 - -#define NGBE_MAX_RX_QUEUES (NGBE_MAX_FDIR_INDICES + 1) -#define NGBE_MAX_TX_QUEUES (NGBE_MAX_FDIR_INDICES + 1) - -#define NGBE_ETH_LENGTH_OF_ADDRESS 6 -#define NGBE_MAX_MSIX_VECTORS 0x09 -#define NGBE_RAR_ENTRIES 32 - -/* TX/RX descriptor defines */ -#define NGBE_DEFAULT_TXD 512 /* default ring size */ -#define NGBE_DEFAULT_TX_WORK 256 -#define NGBE_MAX_TXD 8192 -#define NGBE_MIN_TXD 128 - -#define NGBE_DEFAULT_RXD 512 /* default ring size */ -#define NGBE_DEFAULT_RX_WORK 256 -#define NGBE_MAX_RXD 8192 -#define NGBE_MIN_RXD 128 - -#define NGBE_MAC_STATE_DEFAULT 0x1 -#define NGBE_MAC_STATE_MODIFIED 0x2 -#define NGBE_MAC_STATE_IN_USE 0x4 - -struct ngbe_mac_addr { - u8 addr[ETH_ALEN]; - u16 state; /* bitmask */ - u64 pools; -}; - -/* board specific private data structure */ -struct ngbe_adapter { - u8 __iomem *io_addr; /* Mainly for iounmap use */ - /* OS defined structs */ - struct net_device *netdev; - struct pci_dev *pdev; - - /* structs defined in ngbe_hw.h */ - struct ngbe_hw hw; - struct ngbe_mac_addr *mac_table; - u16 msg_enable; - - /* Tx fast path data */ - int num_tx_queues; - u16 tx_itr_setting; - u16 tx_work_limit; - - /* Rx fast path data */ - int num_rx_queues; - u16 rx_itr_setting; - u16 rx_work_limit; - - int num_q_vectors; /* current number of q_vectors for device */ - int max_q_vectors; /* upper limit of q_vectors for device */ - - u32 tx_ring_count; - u32 rx_ring_count; - -#define NGBE_MAX_RETA_ENTRIES 128 - u8 rss_indir_tbl[NGBE_MAX_RETA_ENTRIES]; - -#define NGBE_RSS_KEY_SIZE 40 /* size of RSS Hash Key in bytes */ - u32 *rss_key; - u32 wol; - - u16 bd_number; -}; - -extern char ngbe_driver_name[]; - -#endif /* _NGBE_H_ */ diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_hw.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_hw.c index 0e3923b3737e..588de24b5e18 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_hw.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_hw.c @@ -9,12 +9,10 @@ #include "../libwx/wx_hw.h" #include "ngbe_type.h" #include "ngbe_hw.h" -#include "ngbe.h" -int ngbe_eeprom_chksum_hostif(struct ngbe_hw *hw) +int ngbe_eeprom_chksum_hostif(struct wx *wx) { struct wx_hic_read_shadow_ram buffer; - struct wx_hw *wxhw = &hw->wxhw; int status; int tmp; @@ -27,61 +25,58 @@ int ngbe_eeprom_chksum_hostif(struct ngbe_hw *hw) /* one word */ buffer.length = 0; - status = wx_host_interface_command(wxhw, (u32 *)&buffer, sizeof(buffer), + status = wx_host_interface_command(wx, (u32 *)&buffer, sizeof(buffer), WX_HI_COMMAND_TIMEOUT, false); if (status < 0) return status; - tmp = rd32a(wxhw, WX_MNG_MBOX, 1); + tmp = rd32a(wx, WX_MNG_MBOX, 1); if (tmp == NGBE_FW_CMD_ST_PASS) return 0; return -EIO; } -static int ngbe_reset_misc(struct ngbe_hw *hw) +static int ngbe_reset_misc(struct wx *wx) { - struct wx_hw *wxhw = &hw->wxhw; - - wx_reset_misc(wxhw); - if (hw->mac_type == ngbe_mac_type_rgmii) - wr32(wxhw, NGBE_MDIO_CLAUSE_SELECT, 0xF); - if (hw->gpio_ctrl) { + wx_reset_misc(wx); + if (wx->mac_type == em_mac_type_rgmii) + wr32(wx, NGBE_MDIO_CLAUSE_SELECT, 0xF); + if (wx->gpio_ctrl) { /* gpio0 is used to power on/off control*/ - wr32(wxhw, NGBE_GPIO_DDR, 0x1); - wr32(wxhw, NGBE_GPIO_DR, NGBE_GPIO_DR_0); + wr32(wx, NGBE_GPIO_DDR, 0x1); + wr32(wx, NGBE_GPIO_DR, NGBE_GPIO_DR_0); } return 0; } /** * ngbe_reset_hw - Perform hardware reset - * @hw: pointer to hardware structure + * @wx: pointer to hardware structure * * Resets the hardware by resetting the transmit and receive units, masks * and clears all interrupts, perform a PHY reset, and perform a link (MAC) * reset. **/ -int ngbe_reset_hw(struct ngbe_hw *hw) +int ngbe_reset_hw(struct wx *wx) { - struct wx_hw *wxhw = &hw->wxhw; int status = 0; u32 reset = 0; - /* Call adapter stop to disable tx/rx and clear interrupts */ - status = wx_stop_adapter(wxhw); + /* Call wx stop to disable tx/rx and clear interrupts */ + status = wx_stop_adapter(wx); if (status != 0) return status; - reset = WX_MIS_RST_LAN_RST(wxhw->bus.func); - wr32(wxhw, WX_MIS_RST, reset | rd32(wxhw, WX_MIS_RST)); - ngbe_reset_misc(hw); + reset = WX_MIS_RST_LAN_RST(wx->bus.func); + wr32(wx, WX_MIS_RST, reset | rd32(wx, WX_MIS_RST)); + ngbe_reset_misc(wx); /* Store the permanent mac address */ - wx_get_mac_addr(wxhw, wxhw->mac.perm_addr); + wx_get_mac_addr(wx, wx->mac.perm_addr); /* reset num_rar_entries to 128 */ - wxhw->mac.num_rar_entries = NGBE_RAR_ENTRIES; - wx_init_rx_addrs(wxhw); - pci_set_master(wxhw->pdev); + wx->mac.num_rar_entries = NGBE_RAR_ENTRIES; + wx_init_rx_addrs(wx); + pci_set_master(wx->pdev); return 0; } diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_hw.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_hw.h index 42476a3fe57c..8d14d51c0a90 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_hw.h +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_hw.h @@ -7,6 +7,6 @@ #ifndef _NGBE_HW_H_ #define _NGBE_HW_H_ -int ngbe_eeprom_chksum_hostif(struct ngbe_hw *hw); -int ngbe_reset_hw(struct ngbe_hw *hw); +int ngbe_eeprom_chksum_hostif(struct wx *wx); +int ngbe_reset_hw(struct wx *wx); #endif /* _NGBE_HW_H_ */ diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index f0b24366da18..f66513ddf6d9 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -14,7 +14,7 @@ #include "../libwx/wx_hw.h" #include "ngbe_type.h" #include "ngbe_hw.h" -#include "ngbe.h" + char ngbe_driver_name[] = "ngbe"; /* ngbe_pci_tbl - PCI Device ID Table @@ -39,70 +39,27 @@ static const struct pci_device_id ngbe_pci_tbl[] = { { .device = 0 } }; -static void ngbe_mac_set_default_filter(struct ngbe_adapter *adapter, u8 *addr) -{ - struct ngbe_hw *hw = &adapter->hw; - - memcpy(&adapter->mac_table[0].addr, addr, ETH_ALEN); - adapter->mac_table[0].pools = 1ULL; - adapter->mac_table[0].state = (NGBE_MAC_STATE_DEFAULT | - NGBE_MAC_STATE_IN_USE); - wx_set_rar(&hw->wxhw, 0, adapter->mac_table[0].addr, - adapter->mac_table[0].pools, - WX_PSR_MAC_SWC_AD_H_AV); -} - /** * ngbe_init_type_code - Initialize the shared code - * @hw: pointer to hardware structure + * @wx: pointer to hardware structure **/ -static void ngbe_init_type_code(struct ngbe_hw *hw) +static void ngbe_init_type_code(struct wx *wx) { int wol_mask = 0, ncsi_mask = 0; - struct wx_hw *wxhw = &hw->wxhw; - u16 type_mask = 0; + u16 type_mask = 0, val; - wxhw->mac.type = wx_mac_em; - type_mask = (u16)(wxhw->subsystem_device_id & NGBE_OEM_MASK); - ncsi_mask = wxhw->subsystem_device_id & NGBE_NCSI_MASK; - wol_mask = wxhw->subsystem_device_id & NGBE_WOL_MASK; + wx->mac.type = wx_mac_em; + type_mask = (u16)(wx->subsystem_device_id & NGBE_OEM_MASK); + ncsi_mask = wx->subsystem_device_id & NGBE_NCSI_MASK; + wol_mask = wx->subsystem_device_id & NGBE_WOL_MASK; - switch (type_mask) { - case NGBE_SUBID_M88E1512_SFP: - case NGBE_SUBID_LY_M88E1512_SFP: - hw->phy.type = ngbe_phy_m88e1512_sfi; - break; - case NGBE_SUBID_M88E1512_RJ45: - hw->phy.type = ngbe_phy_m88e1512; - break; - case NGBE_SUBID_M88E1512_MIX: - hw->phy.type = ngbe_phy_m88e1512_unknown; - break; - case NGBE_SUBID_YT8521S_SFP: - case NGBE_SUBID_YT8521S_SFP_GPIO: - case NGBE_SUBID_LY_YT8521S_SFP: - hw->phy.type = ngbe_phy_yt8521s_sfi; - break; - case NGBE_SUBID_INTERNAL_YT8521S_SFP: - case NGBE_SUBID_INTERNAL_YT8521S_SFP_GPIO: - hw->phy.type = ngbe_phy_internal_yt8521s_sfi; - break; - case NGBE_SUBID_RGMII_FPGA: - case NGBE_SUBID_OCP_CARD: - fallthrough; - default: - hw->phy.type = ngbe_phy_internal; - break; - } + val = rd32(wx, WX_CFG_PORT_ST); + wx->mac_type = (val & BIT(7)) >> 7 ? + em_mac_type_rgmii : + em_mac_type_mdi; - if (hw->phy.type == ngbe_phy_internal || - hw->phy.type == ngbe_phy_internal_yt8521s_sfi) - hw->mac_type = ngbe_mac_type_mdi; - else - hw->mac_type = ngbe_mac_type_rgmii; - - hw->wol_enabled = (wol_mask == NGBE_WOL_SUP) ? 1 : 0; - hw->ncsi_enabled = (ncsi_mask == NGBE_NCSI_MASK || + wx->wol_enabled = (wol_mask == NGBE_WOL_SUP) ? 1 : 0; + wx->ncsi_enabled = (ncsi_mask == NGBE_NCSI_MASK || type_mask == NGBE_SUBID_OCP_CARD) ? 1 : 0; switch (type_mask) { @@ -110,31 +67,31 @@ static void ngbe_init_type_code(struct ngbe_hw *hw) case NGBE_SUBID_LY_M88E1512_SFP: case NGBE_SUBID_YT8521S_SFP_GPIO: case NGBE_SUBID_INTERNAL_YT8521S_SFP_GPIO: - hw->gpio_ctrl = 1; + wx->gpio_ctrl = 1; break; default: - hw->gpio_ctrl = 0; + wx->gpio_ctrl = 0; break; } } /** - * ngbe_init_rss_key - Initialize adapter RSS key - * @adapter: device handle + * ngbe_init_rss_key - Initialize wx RSS key + * @wx: device handle * * Allocates and initializes the RSS key if it is not allocated. **/ -static inline int ngbe_init_rss_key(struct ngbe_adapter *adapter) +static inline int ngbe_init_rss_key(struct wx *wx) { u32 *rss_key; - if (!adapter->rss_key) { - rss_key = kzalloc(NGBE_RSS_KEY_SIZE, GFP_KERNEL); + if (!wx->rss_key) { + rss_key = kzalloc(WX_RSS_KEY_SIZE, GFP_KERNEL); if (unlikely(!rss_key)) return -ENOMEM; - netdev_rss_key_fill(rss_key, NGBE_RSS_KEY_SIZE); - adapter->rss_key = rss_key; + netdev_rss_key_fill(rss_key, WX_RSS_KEY_SIZE); + wx->rss_key = rss_key; } return 0; @@ -142,71 +99,57 @@ static inline int ngbe_init_rss_key(struct ngbe_adapter *adapter) /** * ngbe_sw_init - Initialize general software structures - * @adapter: board private structure to initialize + * @wx: board private structure to initialize **/ -static int ngbe_sw_init(struct ngbe_adapter *adapter) +static int ngbe_sw_init(struct wx *wx) { - struct pci_dev *pdev = adapter->pdev; - struct ngbe_hw *hw = &adapter->hw; - struct wx_hw *wxhw = &hw->wxhw; + struct pci_dev *pdev = wx->pdev; u16 msix_count = 0; int err = 0; - wxhw->hw_addr = adapter->io_addr; - wxhw->pdev = pdev; + wx->mac.num_rar_entries = NGBE_RAR_ENTRIES; + wx->mac.max_rx_queues = NGBE_MAX_RX_QUEUES; + wx->mac.max_tx_queues = NGBE_MAX_TX_QUEUES; /* PCI config space info */ - err = wx_sw_init(wxhw); + err = wx_sw_init(wx); if (err < 0) { - netif_err(adapter, probe, adapter->netdev, - "Read of internal subsystem device id failed\n"); + wx_err(wx, "read of internal subsystem device id failed\n"); return err; } /* mac type, phy type , oem type */ - ngbe_init_type_code(hw); + ngbe_init_type_code(wx); - wxhw->mac.max_rx_queues = NGBE_MAX_RX_QUEUES; - wxhw->mac.max_tx_queues = NGBE_MAX_TX_QUEUES; - wxhw->mac.num_rar_entries = NGBE_RAR_ENTRIES; /* Set common capability flags and settings */ - adapter->max_q_vectors = NGBE_MAX_MSIX_VECTORS; - - err = wx_get_pcie_msix_counts(wxhw, &msix_count, NGBE_MAX_MSIX_VECTORS); + wx->max_q_vectors = NGBE_MAX_MSIX_VECTORS; + err = wx_get_pcie_msix_counts(wx, &msix_count, NGBE_MAX_MSIX_VECTORS); if (err) dev_err(&pdev->dev, "Do not support MSI-X\n"); - wxhw->mac.max_msix_vectors = msix_count; + wx->mac.max_msix_vectors = msix_count; - adapter->mac_table = kcalloc(wxhw->mac.num_rar_entries, - sizeof(struct ngbe_mac_addr), - GFP_KERNEL); - if (!adapter->mac_table) { - dev_err(&pdev->dev, "mac_table allocation failed: %d\n", err); - return -ENOMEM; - } - - if (ngbe_init_rss_key(adapter)) + if (ngbe_init_rss_key(wx)) return -ENOMEM; /* enable itr by default in dynamic mode */ - adapter->rx_itr_setting = 1; - adapter->tx_itr_setting = 1; + wx->rx_itr_setting = 1; + wx->tx_itr_setting = 1; /* set default ring sizes */ - adapter->tx_ring_count = NGBE_DEFAULT_TXD; - adapter->rx_ring_count = NGBE_DEFAULT_RXD; + wx->tx_ring_count = NGBE_DEFAULT_TXD; + wx->rx_ring_count = NGBE_DEFAULT_RXD; /* set default work limits */ - adapter->tx_work_limit = NGBE_DEFAULT_TX_WORK; - adapter->rx_work_limit = NGBE_DEFAULT_RX_WORK; + wx->tx_work_limit = NGBE_DEFAULT_TX_WORK; + wx->rx_work_limit = NGBE_DEFAULT_RX_WORK; return 0; } -static void ngbe_down(struct ngbe_adapter *adapter) +static void ngbe_down(struct wx *wx) { - netif_carrier_off(adapter->netdev); - netif_tx_disable(adapter->netdev); + netif_carrier_off(wx->netdev); + netif_tx_disable(wx->netdev); }; /** @@ -220,11 +163,9 @@ static void ngbe_down(struct ngbe_adapter *adapter) **/ static int ngbe_open(struct net_device *netdev) { - struct ngbe_adapter *adapter = netdev_priv(netdev); - struct ngbe_hw *hw = &adapter->hw; - struct wx_hw *wxhw = &hw->wxhw; + struct wx *wx = netdev_priv(netdev); - wx_control_hw(wxhw, true); + wx_control_hw(wx, true); return 0; } @@ -242,10 +183,10 @@ static int ngbe_open(struct net_device *netdev) **/ static int ngbe_close(struct net_device *netdev) { - struct ngbe_adapter *adapter = netdev_priv(netdev); + struct wx *wx = netdev_priv(netdev); - ngbe_down(adapter); - wx_control_hw(&adapter->hw.wxhw, false); + ngbe_down(wx); + wx_control_hw(wx, false); return 0; } @@ -256,52 +197,29 @@ static netdev_tx_t ngbe_xmit_frame(struct sk_buff *skb, return NETDEV_TX_OK; } -/** - * ngbe_set_mac - Change the Ethernet Address of the NIC - * @netdev: network interface device structure - * @p: pointer to an address structure - * - * Returns 0 on success, negative on failure - **/ -static int ngbe_set_mac(struct net_device *netdev, void *p) -{ - struct ngbe_adapter *adapter = netdev_priv(netdev); - struct wx_hw *wxhw = &adapter->hw.wxhw; - struct sockaddr *addr = p; - - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; - - eth_hw_addr_set(netdev, addr->sa_data); - memcpy(wxhw->mac.addr, addr->sa_data, netdev->addr_len); - - ngbe_mac_set_default_filter(adapter, wxhw->mac.addr); - - return 0; -} - static void ngbe_dev_shutdown(struct pci_dev *pdev, bool *enable_wake) { - struct ngbe_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; + struct wx *wx = pci_get_drvdata(pdev); + struct net_device *netdev; + netdev = wx->netdev; netif_device_detach(netdev); rtnl_lock(); if (netif_running(netdev)) - ngbe_down(adapter); + ngbe_down(wx); rtnl_unlock(); - wx_control_hw(&adapter->hw.wxhw, false); + wx_control_hw(wx, false); pci_disable_device(pdev); } static void ngbe_shutdown(struct pci_dev *pdev) { - struct ngbe_adapter *adapter = pci_get_drvdata(pdev); + struct wx *wx = pci_get_drvdata(pdev); bool wake; - wake = !!adapter->wol; + wake = !!wx->wol; ngbe_dev_shutdown(pdev, &wake); @@ -316,7 +234,7 @@ static const struct net_device_ops ngbe_netdev_ops = { .ndo_stop = ngbe_close, .ndo_start_xmit = ngbe_xmit_frame, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = ngbe_set_mac, + .ndo_set_mac_address = wx_set_mac, }; /** @@ -326,18 +244,16 @@ static const struct net_device_ops ngbe_netdev_ops = { * * Returns 0 on success, negative on failure * - * ngbe_probe initializes an adapter identified by a pci_dev structure. - * The OS initialization, configuring of the adapter private structure, + * ngbe_probe initializes an wx identified by a pci_dev structure. + * The OS initialization, configuring of the wx private structure, * and a hardware reset occur. **/ static int ngbe_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) { - struct ngbe_adapter *adapter = NULL; - struct ngbe_hw *hw = NULL; - struct wx_hw *wxhw = NULL; struct net_device *netdev; u32 e2rom_cksum_cap = 0; + struct wx *wx = NULL; static int func_nums; u16 e2rom_ver = 0; u32 etrack_id = 0; @@ -368,7 +284,7 @@ static int ngbe_probe(struct pci_dev *pdev, pci_set_master(pdev); netdev = devm_alloc_etherdev_mqs(&pdev->dev, - sizeof(struct ngbe_adapter), + sizeof(struct wx), NGBE_MAX_TX_QUEUES, NGBE_MAX_RX_QUEUES); if (!netdev) { @@ -378,17 +294,15 @@ static int ngbe_probe(struct pci_dev *pdev, SET_NETDEV_DEV(netdev, &pdev->dev); - adapter = netdev_priv(netdev); - adapter->netdev = netdev; - adapter->pdev = pdev; - hw = &adapter->hw; - wxhw = &hw->wxhw; - adapter->msg_enable = BIT(3) - 1; - - adapter->io_addr = devm_ioremap(&pdev->dev, - pci_resource_start(pdev, 0), - pci_resource_len(pdev, 0)); - if (!adapter->io_addr) { + wx = netdev_priv(netdev); + wx->netdev = netdev; + wx->pdev = pdev; + wx->msg_enable = BIT(3) - 1; + + wx->hw_addr = devm_ioremap(&pdev->dev, + pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!wx->hw_addr) { err = -EIO; goto err_pci_release_regions; } @@ -397,44 +311,44 @@ static int ngbe_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_HIGHDMA; - adapter->bd_number = func_nums; + wx->bd_number = func_nums; /* setup the private structure */ - err = ngbe_sw_init(adapter); + err = ngbe_sw_init(wx); if (err) goto err_free_mac_table; /* check if flash load is done after hw power up */ - err = wx_check_flash_load(wxhw, NGBE_SPI_ILDR_STATUS_PERST); + err = wx_check_flash_load(wx, NGBE_SPI_ILDR_STATUS_PERST); if (err) goto err_free_mac_table; - err = wx_check_flash_load(wxhw, NGBE_SPI_ILDR_STATUS_PWRRST); + err = wx_check_flash_load(wx, NGBE_SPI_ILDR_STATUS_PWRRST); if (err) goto err_free_mac_table; - err = wx_mng_present(wxhw); + err = wx_mng_present(wx); if (err) { dev_err(&pdev->dev, "Management capability is not present\n"); goto err_free_mac_table; } - err = ngbe_reset_hw(hw); + err = ngbe_reset_hw(wx); if (err) { dev_err(&pdev->dev, "HW Init failed: %d\n", err); goto err_free_mac_table; } - if (wxhw->bus.func == 0) { - wr32(wxhw, NGBE_CALSUM_CAP_STATUS, 0x0); - wr32(wxhw, NGBE_EEPROM_VERSION_STORE_REG, 0x0); + if (wx->bus.func == 0) { + wr32(wx, NGBE_CALSUM_CAP_STATUS, 0x0); + wr32(wx, NGBE_EEPROM_VERSION_STORE_REG, 0x0); } else { - e2rom_cksum_cap = rd32(wxhw, NGBE_CALSUM_CAP_STATUS); - saved_ver = rd32(wxhw, NGBE_EEPROM_VERSION_STORE_REG); + e2rom_cksum_cap = rd32(wx, NGBE_CALSUM_CAP_STATUS); + saved_ver = rd32(wx, NGBE_EEPROM_VERSION_STORE_REG); } - wx_init_eeprom_params(wxhw); - if (wxhw->bus.func == 0 || e2rom_cksum_cap == 0) { + wx_init_eeprom_params(wx); + if (wx->bus.func == 0 || e2rom_cksum_cap == 0) { /* make sure the EEPROM is ready */ - err = ngbe_eeprom_chksum_hostif(hw); + err = ngbe_eeprom_chksum_hostif(wx); if (err) { dev_err(&pdev->dev, "The EEPROM Checksum Is Not Valid\n"); err = -EIO; @@ -442,14 +356,14 @@ static int ngbe_probe(struct pci_dev *pdev, } } - adapter->wol = 0; - if (hw->wol_enabled) - adapter->wol = NGBE_PSR_WKUP_CTL_MAG; + wx->wol = 0; + if (wx->wol_enabled) + wx->wol = NGBE_PSR_WKUP_CTL_MAG; - hw->wol_enabled = !!(adapter->wol); - wr32(wxhw, NGBE_PSR_WKUP_CTL, adapter->wol); + wx->wol_enabled = !!(wx->wol); + wr32(wx, NGBE_PSR_WKUP_CTL, wx->wol); - device_set_wakeup_enable(&pdev->dev, adapter->wol); + device_set_wakeup_enable(&pdev->dev, wx->wol); /* Save off EEPROM version number and Option Rom version which * together make a unique identify for the eeprom @@ -457,37 +371,37 @@ static int ngbe_probe(struct pci_dev *pdev, if (saved_ver) { etrack_id = saved_ver; } else { - wx_read_ee_hostif(wxhw, - wxhw->eeprom.sw_region_offset + NGBE_EEPROM_VERSION_H, + wx_read_ee_hostif(wx, + wx->eeprom.sw_region_offset + NGBE_EEPROM_VERSION_H, &e2rom_ver); etrack_id = e2rom_ver << 16; - wx_read_ee_hostif(wxhw, - wxhw->eeprom.sw_region_offset + NGBE_EEPROM_VERSION_L, + wx_read_ee_hostif(wx, + wx->eeprom.sw_region_offset + NGBE_EEPROM_VERSION_L, &e2rom_ver); etrack_id |= e2rom_ver; - wr32(wxhw, NGBE_EEPROM_VERSION_STORE_REG, etrack_id); + wr32(wx, NGBE_EEPROM_VERSION_STORE_REG, etrack_id); } - eth_hw_addr_set(netdev, wxhw->mac.perm_addr); - ngbe_mac_set_default_filter(adapter, wxhw->mac.perm_addr); + eth_hw_addr_set(netdev, wx->mac.perm_addr); + wx_mac_set_default_filter(wx, wx->mac.perm_addr); err = register_netdev(netdev); if (err) goto err_register; - pci_set_drvdata(pdev, adapter); + pci_set_drvdata(pdev, wx); - netif_info(adapter, probe, netdev, + netif_info(wx, probe, netdev, "PHY: %s, PBA No: Wang Xun GbE Family Controller\n", - hw->phy.type == ngbe_phy_internal ? "Internal" : "External"); - netif_info(adapter, probe, netdev, "%pM\n", netdev->dev_addr); + wx->mac_type == em_mac_type_mdi ? "Internal" : "External"); + netif_info(wx, probe, netdev, "%pM\n", netdev->dev_addr); return 0; err_register: - wx_control_hw(wxhw, false); + wx_control_hw(wx, false); err_free_mac_table: - kfree(adapter->mac_table); + kfree(wx->mac_table); err_pci_release_regions: pci_disable_pcie_error_reporting(pdev); pci_release_selected_regions(pdev, @@ -508,15 +422,15 @@ err_pci_disable_dev: **/ static void ngbe_remove(struct pci_dev *pdev) { - struct ngbe_adapter *adapter = pci_get_drvdata(pdev); + struct wx *wx = pci_get_drvdata(pdev); struct net_device *netdev; - netdev = adapter->netdev; + netdev = wx->netdev; unregister_netdev(netdev); pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); - kfree(adapter->mac_table); + kfree(wx->mac_table); pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h index 39f6c03f1a54..369d181930bc 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h @@ -90,50 +90,26 @@ #define NGBE_FW_CMD_ST_PASS 0x80658383 #define NGBE_FW_CMD_ST_FAIL 0x70657376 -enum ngbe_phy_type { - ngbe_phy_unknown = 0, - ngbe_phy_none, - ngbe_phy_internal, - ngbe_phy_m88e1512, - ngbe_phy_m88e1512_sfi, - ngbe_phy_m88e1512_unknown, - ngbe_phy_yt8521s, - ngbe_phy_yt8521s_sfi, - ngbe_phy_internal_yt8521s_sfi, - ngbe_phy_generic -}; - -enum ngbe_media_type { - ngbe_media_type_unknown = 0, - ngbe_media_type_fiber, - ngbe_media_type_copper, - ngbe_media_type_backplane, -}; - -enum ngbe_mac_type { - ngbe_mac_type_unknown = 0, - ngbe_mac_type_mdi, - ngbe_mac_type_rgmii -}; - -struct ngbe_phy_info { - enum ngbe_phy_type type; - enum ngbe_media_type media_type; - - u32 addr; - u32 id; - - bool reset_if_overtemp; - -}; - -struct ngbe_hw { - struct wx_hw wxhw; - struct ngbe_phy_info phy; - enum ngbe_mac_type mac_type; - - bool wol_enabled; - bool ncsi_enabled; - bool gpio_ctrl; -}; +#define NGBE_MAX_FDIR_INDICES 7 + +#define NGBE_MAX_RX_QUEUES (NGBE_MAX_FDIR_INDICES + 1) +#define NGBE_MAX_TX_QUEUES (NGBE_MAX_FDIR_INDICES + 1) + +#define NGBE_ETH_LENGTH_OF_ADDRESS 6 +#define NGBE_MAX_MSIX_VECTORS 0x09 +#define NGBE_RAR_ENTRIES 32 + +/* TX/RX descriptor defines */ +#define NGBE_DEFAULT_TXD 512 /* default ring size */ +#define NGBE_DEFAULT_TX_WORK 256 +#define NGBE_MAX_TXD 8192 +#define NGBE_MIN_TXD 128 + +#define NGBE_DEFAULT_RXD 512 /* default ring size */ +#define NGBE_DEFAULT_RX_WORK 256 +#define NGBE_MAX_RXD 8192 +#define NGBE_MIN_RXD 128 + +extern char ngbe_driver_name[]; + #endif /* _NGBE_TYPE_H_ */ diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe.h b/drivers/net/ethernet/wangxun/txgbe/txgbe.h deleted file mode 100644 index 19e61377bd00..000000000000 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe.h +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2015 - 2022 Beijing WangXun Technology Co., Ltd. */ - -#ifndef _TXGBE_H_ -#define _TXGBE_H_ - -#define TXGBE_MAX_FDIR_INDICES 63 - -#define TXGBE_MAX_RX_QUEUES (TXGBE_MAX_FDIR_INDICES + 1) -#define TXGBE_MAX_TX_QUEUES (TXGBE_MAX_FDIR_INDICES + 1) - -#define TXGBE_SP_MAX_TX_QUEUES 128 -#define TXGBE_SP_MAX_RX_QUEUES 128 -#define TXGBE_SP_RAR_ENTRIES 128 -#define TXGBE_SP_MC_TBL_SIZE 128 - -struct txgbe_mac_addr { - u8 addr[ETH_ALEN]; - u16 state; /* bitmask */ - u64 pools; -}; - -#define TXGBE_MAC_STATE_DEFAULT 0x1 -#define TXGBE_MAC_STATE_MODIFIED 0x2 -#define TXGBE_MAC_STATE_IN_USE 0x4 - -/* board specific private data structure */ -struct txgbe_adapter { - u8 __iomem *io_addr; /* Mainly for iounmap use */ - /* OS defined structs */ - struct net_device *netdev; - struct pci_dev *pdev; - - /* structs defined in txgbe_type.h */ - struct txgbe_hw hw; - u16 msg_enable; - struct txgbe_mac_addr *mac_table; - char eeprom_id[32]; -}; - -extern char txgbe_driver_name[]; - -#endif /* _TXGBE_H_ */ diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c index 167f7ff73192..ebc46f3be056 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c @@ -12,70 +12,67 @@ #include "../libwx/wx_hw.h" #include "txgbe_type.h" #include "txgbe_hw.h" -#include "txgbe.h" /** * txgbe_init_thermal_sensor_thresh - Inits thermal sensor thresholds - * @hw: pointer to hardware structure + * @wx: pointer to hardware structure * * Inits the thermal sensor thresholds according to the NVM map * and save off the threshold and location values into mac.thermal_sensor_data **/ -static void txgbe_init_thermal_sensor_thresh(struct txgbe_hw *hw) +static void txgbe_init_thermal_sensor_thresh(struct wx *wx) { - struct wx_hw *wxhw = &hw->wxhw; - struct wx_thermal_sensor_data *data = &wxhw->mac.sensor; + struct wx_thermal_sensor_data *data = &wx->mac.sensor; memset(data, 0, sizeof(struct wx_thermal_sensor_data)); /* Only support thermal sensors attached to SP physical port 0 */ - if (wxhw->bus.func) + if (wx->bus.func) return; - wr32(wxhw, TXGBE_TS_CTL, TXGBE_TS_CTL_EVAL_MD); + wr32(wx, TXGBE_TS_CTL, TXGBE_TS_CTL_EVAL_MD); - wr32(wxhw, WX_TS_INT_EN, + wr32(wx, WX_TS_INT_EN, WX_TS_INT_EN_ALARM_INT_EN | WX_TS_INT_EN_DALARM_INT_EN); - wr32(wxhw, WX_TS_EN, WX_TS_EN_ENA); + wr32(wx, WX_TS_EN, WX_TS_EN_ENA); data->alarm_thresh = 100; - wr32(wxhw, WX_TS_ALARM_THRE, 677); + wr32(wx, WX_TS_ALARM_THRE, 677); data->dalarm_thresh = 90; - wr32(wxhw, WX_TS_DALARM_THRE, 614); + wr32(wx, WX_TS_DALARM_THRE, 614); } /** * txgbe_read_pba_string - Reads part number string from EEPROM - * @hw: pointer to hardware structure + * @wx: pointer to hardware structure * @pba_num: stores the part number string from the EEPROM * @pba_num_size: part number string buffer length * * Reads the part number string from the EEPROM. **/ -int txgbe_read_pba_string(struct txgbe_hw *hw, u8 *pba_num, u32 pba_num_size) +int txgbe_read_pba_string(struct wx *wx, u8 *pba_num, u32 pba_num_size) { u16 pba_ptr, offset, length, data; - struct wx_hw *wxhw = &hw->wxhw; int ret_val; if (!pba_num) { - wx_err(wxhw, "PBA string buffer was null\n"); + wx_err(wx, "PBA string buffer was null\n"); return -EINVAL; } - ret_val = wx_read_ee_hostif(wxhw, - wxhw->eeprom.sw_region_offset + TXGBE_PBANUM0_PTR, + ret_val = wx_read_ee_hostif(wx, + wx->eeprom.sw_region_offset + TXGBE_PBANUM0_PTR, &data); if (ret_val != 0) { - wx_err(wxhw, "NVM Read Error\n"); + wx_err(wx, "NVM Read Error\n"); return ret_val; } - ret_val = wx_read_ee_hostif(wxhw, - wxhw->eeprom.sw_region_offset + TXGBE_PBANUM1_PTR, + ret_val = wx_read_ee_hostif(wx, + wx->eeprom.sw_region_offset + TXGBE_PBANUM1_PTR, &pba_ptr); if (ret_val != 0) { - wx_err(wxhw, "NVM Read Error\n"); + wx_err(wx, "NVM Read Error\n"); return ret_val; } @@ -84,11 +81,11 @@ int txgbe_read_pba_string(struct txgbe_hw *hw, u8 *pba_num, u32 pba_num_size) * and we can decode it into an ascii string */ if (data != TXGBE_PBANUM_PTR_GUARD) { - wx_err(wxhw, "NVM PBA number is not stored as string\n"); + wx_err(wx, "NVM PBA number is not stored as string\n"); /* we will need 11 characters to store the PBA */ if (pba_num_size < 11) { - wx_err(wxhw, "PBA string buffer too small\n"); + wx_err(wx, "PBA string buffer too small\n"); return -ENOMEM; } @@ -118,20 +115,20 @@ int txgbe_read_pba_string(struct txgbe_hw *hw, u8 *pba_num, u32 pba_num_size) return 0; } - ret_val = wx_read_ee_hostif(wxhw, pba_ptr, &length); + ret_val = wx_read_ee_hostif(wx, pba_ptr, &length); if (ret_val != 0) { - wx_err(wxhw, "NVM Read Error\n"); + wx_err(wx, "NVM Read Error\n"); return ret_val; } if (length == 0xFFFF || length == 0) { - wx_err(wxhw, "NVM PBA number section invalid length\n"); + wx_err(wx, "NVM PBA number section invalid length\n"); return -EINVAL; } /* check if pba_num buffer is big enough */ if (pba_num_size < (((u32)length * 2) - 1)) { - wx_err(wxhw, "PBA string buffer too small\n"); + wx_err(wx, "PBA string buffer too small\n"); return -ENOMEM; } @@ -140,9 +137,9 @@ int txgbe_read_pba_string(struct txgbe_hw *hw, u8 *pba_num, u32 pba_num_size) length--; for (offset = 0; offset < length; offset++) { - ret_val = wx_read_ee_hostif(wxhw, pba_ptr + offset, &data); + ret_val = wx_read_ee_hostif(wx, pba_ptr + offset, &data); if (ret_val != 0) { - wx_err(wxhw, "NVM Read Error\n"); + wx_err(wx, "NVM Read Error\n"); return ret_val; } pba_num[offset * 2] = (u8)(data >> 8); @@ -155,14 +152,13 @@ int txgbe_read_pba_string(struct txgbe_hw *hw, u8 *pba_num, u32 pba_num_size) /** * txgbe_calc_eeprom_checksum - Calculates and returns the checksum - * @hw: pointer to hardware structure + * @wx: pointer to hardware structure * @checksum: pointer to cheksum * * Returns a negative error code on error **/ -static int txgbe_calc_eeprom_checksum(struct txgbe_hw *hw, u16 *checksum) +static int txgbe_calc_eeprom_checksum(struct wx *wx, u16 *checksum) { - struct wx_hw *wxhw = &hw->wxhw; u16 *eeprom_ptrs = NULL; u32 buffer_size = 0; u16 *buffer = NULL; @@ -170,7 +166,7 @@ static int txgbe_calc_eeprom_checksum(struct txgbe_hw *hw, u16 *checksum) int status; u16 i; - wx_init_eeprom_params(wxhw); + wx_init_eeprom_params(wx); if (!buffer) { eeprom_ptrs = kvmalloc_array(TXGBE_EEPROM_LAST_WORD, sizeof(u16), @@ -178,11 +174,11 @@ static int txgbe_calc_eeprom_checksum(struct txgbe_hw *hw, u16 *checksum) if (!eeprom_ptrs) return -ENOMEM; /* Read pointer area */ - status = wx_read_ee_hostif_buffer(wxhw, 0, + status = wx_read_ee_hostif_buffer(wx, 0, TXGBE_EEPROM_LAST_WORD, eeprom_ptrs); if (status != 0) { - wx_err(wxhw, "Failed to read EEPROM image\n"); + wx_err(wx, "Failed to read EEPROM image\n"); kvfree(eeprom_ptrs); return status; } @@ -194,7 +190,7 @@ static int txgbe_calc_eeprom_checksum(struct txgbe_hw *hw, u16 *checksum) } for (i = 0; i < TXGBE_EEPROM_LAST_WORD; i++) - if (i != wxhw->eeprom.sw_region_offset + TXGBE_EEPROM_CHECKSUM) + if (i != wx->eeprom.sw_region_offset + TXGBE_EEPROM_CHECKSUM) *checksum += local_buffer[i]; if (eeprom_ptrs) @@ -210,15 +206,14 @@ static int txgbe_calc_eeprom_checksum(struct txgbe_hw *hw, u16 *checksum) /** * txgbe_validate_eeprom_checksum - Validate EEPROM checksum - * @hw: pointer to hardware structure + * @wx: pointer to hardware structure * @checksum_val: calculated checksum * * Performs checksum calculation and validates the EEPROM checksum. If the * caller does not need checksum_val, the value can be NULL. **/ -int txgbe_validate_eeprom_checksum(struct txgbe_hw *hw, u16 *checksum_val) +int txgbe_validate_eeprom_checksum(struct wx *wx, u16 *checksum_val) { - struct wx_hw *wxhw = &hw->wxhw; u16 read_checksum = 0; u16 checksum; int status; @@ -227,18 +222,18 @@ int txgbe_validate_eeprom_checksum(struct txgbe_hw *hw, u16 *checksum_val) * not continue or we could be in for a very long wait while every * EEPROM read fails */ - status = wx_read_ee_hostif(wxhw, 0, &checksum); + status = wx_read_ee_hostif(wx, 0, &checksum); if (status) { - wx_err(wxhw, "EEPROM read failed\n"); + wx_err(wx, "EEPROM read failed\n"); return status; } checksum = 0; - status = txgbe_calc_eeprom_checksum(hw, &checksum); + status = txgbe_calc_eeprom_checksum(wx, &checksum); if (status != 0) return status; - status = wx_read_ee_hostif(wxhw, wxhw->eeprom.sw_region_offset + + status = wx_read_ee_hostif(wx, wx->eeprom.sw_region_offset + TXGBE_EEPROM_CHECKSUM, &read_checksum); if (status != 0) return status; @@ -248,7 +243,7 @@ int txgbe_validate_eeprom_checksum(struct txgbe_hw *hw, u16 *checksum_val) */ if (read_checksum != checksum) { status = -EIO; - wx_err(wxhw, "Invalid EEPROM checksum\n"); + wx_err(wx, "Invalid EEPROM checksum\n"); } /* If the user cares, return the calculated checksum */ @@ -258,55 +253,52 @@ int txgbe_validate_eeprom_checksum(struct txgbe_hw *hw, u16 *checksum_val) return status; } -static void txgbe_reset_misc(struct txgbe_hw *hw) +static void txgbe_reset_misc(struct wx *wx) { - struct wx_hw *wxhw = &hw->wxhw; - - wx_reset_misc(wxhw); - txgbe_init_thermal_sensor_thresh(hw); + wx_reset_misc(wx); + txgbe_init_thermal_sensor_thresh(wx); } /** * txgbe_reset_hw - Perform hardware reset - * @hw: pointer to hardware structure + * @wx: pointer to wx structure * * Resets the hardware by resetting the transmit and receive units, masks * and clears all interrupts, perform a PHY reset, and perform a link (MAC) * reset. **/ -int txgbe_reset_hw(struct txgbe_hw *hw) +int txgbe_reset_hw(struct wx *wx) { - struct wx_hw *wxhw = &hw->wxhw; int status; /* Call adapter stop to disable tx/rx and clear interrupts */ - status = wx_stop_adapter(wxhw); + status = wx_stop_adapter(wx); if (status != 0) return status; - if (!(((wxhw->subsystem_device_id & WX_NCSI_MASK) == WX_NCSI_SUP) || - ((wxhw->subsystem_device_id & WX_WOL_MASK) == WX_WOL_SUP))) - wx_reset_hostif(wxhw); + if (!(((wx->subsystem_device_id & WX_NCSI_MASK) == WX_NCSI_SUP) || + ((wx->subsystem_device_id & WX_WOL_MASK) == WX_WOL_SUP))) + wx_reset_hostif(wx); usleep_range(10, 100); - status = wx_check_flash_load(wxhw, TXGBE_SPI_ILDR_STATUS_LAN_SW_RST(wxhw->bus.func)); + status = wx_check_flash_load(wx, TXGBE_SPI_ILDR_STATUS_LAN_SW_RST(wx->bus.func)); if (status != 0) return status; - txgbe_reset_misc(hw); + txgbe_reset_misc(wx); /* Store the permanent mac address */ - wx_get_mac_addr(wxhw, wxhw->mac.perm_addr); + wx_get_mac_addr(wx, wx->mac.perm_addr); /* Store MAC address from RAR0, clear receive address registers, and * clear the multicast table. Also reset num_rar_entries to 128, * since we modify this value when programming the SAN MAC address. */ - wxhw->mac.num_rar_entries = TXGBE_SP_RAR_ENTRIES; - wx_init_rx_addrs(wxhw); + wx->mac.num_rar_entries = TXGBE_SP_RAR_ENTRIES; + wx_init_rx_addrs(wx); - pci_set_master(wxhw->pdev); + pci_set_master(wx->pdev); return 0; } diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.h index 6a751a69177b..e82f65dff8a6 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.h @@ -4,8 +4,8 @@ #ifndef _TXGBE_HW_H_ #define _TXGBE_HW_H_ -int txgbe_read_pba_string(struct txgbe_hw *hw, u8 *pba_num, u32 pba_num_size); -int txgbe_validate_eeprom_checksum(struct txgbe_hw *hw, u16 *checksum_val); -int txgbe_reset_hw(struct txgbe_hw *hw); +int txgbe_read_pba_string(struct wx *wx, u8 *pba_num, u32 pba_num_size); +int txgbe_validate_eeprom_checksum(struct wx *wx, u16 *checksum_val); +int txgbe_reset_hw(struct wx *wx); #endif /* _TXGBE_HW_H_ */ diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 36780e7f05b7..aa4d09df3b01 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -14,7 +14,6 @@ #include "../libwx/wx_hw.h" #include "txgbe_type.h" #include "txgbe_hw.h" -#include "txgbe.h" char txgbe_driver_name[] = "txgbe"; @@ -35,26 +34,26 @@ static const struct pci_device_id txgbe_pci_tbl[] = { #define DEFAULT_DEBUG_LEVEL_SHIFT 3 -static void txgbe_check_minimum_link(struct txgbe_adapter *adapter) +static void txgbe_check_minimum_link(struct wx *wx) { struct pci_dev *pdev; - pdev = adapter->pdev; + pdev = wx->pdev; pcie_print_link_status(pdev); } /** * txgbe_enumerate_functions - Get the number of ports this device has - * @adapter: adapter structure + * @wx: wx structure * * This function enumerates the phsyical functions co-located on a single slot, * in order to determine how many ports a device has. This is most useful in * determining the required GT/s of PCIe bandwidth necessary for optimal * performance. **/ -static int txgbe_enumerate_functions(struct txgbe_adapter *adapter) +static int txgbe_enumerate_functions(struct wx *wx) { - struct pci_dev *entry, *pdev = adapter->pdev; + struct pci_dev *entry, *pdev = wx->pdev; int physfns = 0; list_for_each_entry(entry, &pdev->bus->devices, bus_list) { @@ -73,197 +72,90 @@ static int txgbe_enumerate_functions(struct txgbe_adapter *adapter) return physfns; } -static void txgbe_sync_mac_table(struct txgbe_adapter *adapter) +static void txgbe_up_complete(struct wx *wx) { - struct txgbe_hw *hw = &adapter->hw; - struct wx_hw *wxhw = &hw->wxhw; - int i; - - for (i = 0; i < wxhw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state & TXGBE_MAC_STATE_MODIFIED) { - if (adapter->mac_table[i].state & TXGBE_MAC_STATE_IN_USE) { - wx_set_rar(wxhw, i, - adapter->mac_table[i].addr, - adapter->mac_table[i].pools, - WX_PSR_MAC_SWC_AD_H_AV); - } else { - wx_clear_rar(wxhw, i); - } - adapter->mac_table[i].state &= ~(TXGBE_MAC_STATE_MODIFIED); - } - } -} - -/* this function destroys the first RAR entry */ -static void txgbe_mac_set_default_filter(struct txgbe_adapter *adapter, - u8 *addr) -{ - struct wx_hw *wxhw = &adapter->hw.wxhw; - - memcpy(&adapter->mac_table[0].addr, addr, ETH_ALEN); - adapter->mac_table[0].pools = 1ULL; - adapter->mac_table[0].state = (TXGBE_MAC_STATE_DEFAULT | - TXGBE_MAC_STATE_IN_USE); - wx_set_rar(wxhw, 0, adapter->mac_table[0].addr, - adapter->mac_table[0].pools, - WX_PSR_MAC_SWC_AD_H_AV); -} - -static void txgbe_flush_sw_mac_table(struct txgbe_adapter *adapter) -{ - struct wx_hw *wxhw = &adapter->hw.wxhw; - u32 i; - - for (i = 0; i < wxhw->mac.num_rar_entries; i++) { - adapter->mac_table[i].state |= TXGBE_MAC_STATE_MODIFIED; - adapter->mac_table[i].state &= ~TXGBE_MAC_STATE_IN_USE; - memset(adapter->mac_table[i].addr, 0, ETH_ALEN); - adapter->mac_table[i].pools = 0; - } - txgbe_sync_mac_table(adapter); -} - -static int txgbe_del_mac_filter(struct txgbe_adapter *adapter, u8 *addr, u16 pool) -{ - struct wx_hw *wxhw = &adapter->hw.wxhw; - u32 i; - - if (is_zero_ether_addr(addr)) - return -EINVAL; - - /* search table for addr, if found, set to 0 and sync */ - for (i = 0; i < wxhw->mac.num_rar_entries; i++) { - if (ether_addr_equal(addr, adapter->mac_table[i].addr)) { - if (adapter->mac_table[i].pools & (1ULL << pool)) { - adapter->mac_table[i].state |= TXGBE_MAC_STATE_MODIFIED; - adapter->mac_table[i].state &= ~TXGBE_MAC_STATE_IN_USE; - adapter->mac_table[i].pools &= ~(1ULL << pool); - txgbe_sync_mac_table(adapter); - } - return 0; - } - - if (adapter->mac_table[i].pools != (1 << pool)) - continue; - if (!ether_addr_equal(addr, adapter->mac_table[i].addr)) - continue; - - adapter->mac_table[i].state |= TXGBE_MAC_STATE_MODIFIED; - adapter->mac_table[i].state &= ~TXGBE_MAC_STATE_IN_USE; - memset(adapter->mac_table[i].addr, 0, ETH_ALEN); - adapter->mac_table[i].pools = 0; - txgbe_sync_mac_table(adapter); - return 0; - } - return -ENOMEM; -} - -static void txgbe_up_complete(struct txgbe_adapter *adapter) -{ - struct txgbe_hw *hw = &adapter->hw; - struct wx_hw *wxhw = &hw->wxhw; - - wx_control_hw(wxhw, true); + wx_control_hw(wx, true); } -static void txgbe_reset(struct txgbe_adapter *adapter) +static void txgbe_reset(struct wx *wx) { - struct net_device *netdev = adapter->netdev; - struct txgbe_hw *hw = &adapter->hw; + struct net_device *netdev = wx->netdev; u8 old_addr[ETH_ALEN]; int err; - err = txgbe_reset_hw(hw); + err = txgbe_reset_hw(wx); if (err != 0) - dev_err(&adapter->pdev->dev, "Hardware Error: %d\n", err); + wx_err(wx, "Hardware Error: %d\n", err); /* do not flush user set addresses */ - memcpy(old_addr, &adapter->mac_table[0].addr, netdev->addr_len); - txgbe_flush_sw_mac_table(adapter); - txgbe_mac_set_default_filter(adapter, old_addr); + memcpy(old_addr, &wx->mac_table[0].addr, netdev->addr_len); + wx_flush_sw_mac_table(wx); + wx_mac_set_default_filter(wx, old_addr); } -static void txgbe_disable_device(struct txgbe_adapter *adapter) +static void txgbe_disable_device(struct wx *wx) { - struct net_device *netdev = adapter->netdev; - struct wx_hw *wxhw = &adapter->hw.wxhw; + struct net_device *netdev = wx->netdev; - wx_disable_pcie_master(wxhw); + wx_disable_pcie_master(wx); /* disable receives */ - wx_disable_rx(wxhw); + wx_disable_rx(wx); netif_carrier_off(netdev); netif_tx_disable(netdev); - if (wxhw->bus.func < 2) - wr32m(wxhw, TXGBE_MIS_PRB_CTL, TXGBE_MIS_PRB_CTL_LAN_UP(wxhw->bus.func), 0); + if (wx->bus.func < 2) + wr32m(wx, TXGBE_MIS_PRB_CTL, TXGBE_MIS_PRB_CTL_LAN_UP(wx->bus.func), 0); else - dev_err(&adapter->pdev->dev, - "%s: invalid bus lan id %d\n", - __func__, wxhw->bus.func); + wx_err(wx, "%s: invalid bus lan id %d\n", + __func__, wx->bus.func); - if (!(((wxhw->subsystem_device_id & WX_NCSI_MASK) == WX_NCSI_SUP) || - ((wxhw->subsystem_device_id & WX_WOL_MASK) == WX_WOL_SUP))) { + if (!(((wx->subsystem_device_id & WX_NCSI_MASK) == WX_NCSI_SUP) || + ((wx->subsystem_device_id & WX_WOL_MASK) == WX_WOL_SUP))) { /* disable mac transmiter */ - wr32m(wxhw, WX_MAC_TX_CFG, WX_MAC_TX_CFG_TE, 0); + wr32m(wx, WX_MAC_TX_CFG, WX_MAC_TX_CFG_TE, 0); } /* Disable the Tx DMA engine */ - wr32m(wxhw, WX_TDM_CTL, WX_TDM_CTL_TE, 0); + wr32m(wx, WX_TDM_CTL, WX_TDM_CTL_TE, 0); } -static void txgbe_down(struct txgbe_adapter *adapter) +static void txgbe_down(struct wx *wx) { - txgbe_disable_device(adapter); - txgbe_reset(adapter); + txgbe_disable_device(wx); + txgbe_reset(wx); } /** - * txgbe_sw_init - Initialize general software structures (struct txgbe_adapter) - * @adapter: board private structure to initialize + * txgbe_sw_init - Initialize general software structures (struct wx) + * @wx: board private structure to initialize **/ -static int txgbe_sw_init(struct txgbe_adapter *adapter) +static int txgbe_sw_init(struct wx *wx) { - struct pci_dev *pdev = adapter->pdev; - struct txgbe_hw *hw = &adapter->hw; - struct wx_hw *wxhw = &hw->wxhw; int err; - wxhw->hw_addr = adapter->io_addr; - wxhw->pdev = pdev; + wx->mac.num_rar_entries = TXGBE_SP_RAR_ENTRIES; + wx->mac.max_tx_queues = TXGBE_SP_MAX_TX_QUEUES; + wx->mac.max_rx_queues = TXGBE_SP_MAX_RX_QUEUES; + wx->mac.mcft_size = TXGBE_SP_MC_TBL_SIZE; /* PCI config space info */ - err = wx_sw_init(wxhw); + err = wx_sw_init(wx); if (err < 0) { - netif_err(adapter, probe, adapter->netdev, - "read of internal subsystem device id failed\n"); + wx_err(wx, "read of internal subsystem device id failed\n"); return err; } - switch (wxhw->device_id) { + switch (wx->device_id) { case TXGBE_DEV_ID_SP1000: case TXGBE_DEV_ID_WX1820: - wxhw->mac.type = wx_mac_sp; + wx->mac.type = wx_mac_sp; break; default: - wxhw->mac.type = wx_mac_unknown; + wx->mac.type = wx_mac_unknown; break; } - wxhw->mac.num_rar_entries = TXGBE_SP_RAR_ENTRIES; - wxhw->mac.max_tx_queues = TXGBE_SP_MAX_TX_QUEUES; - wxhw->mac.max_rx_queues = TXGBE_SP_MAX_RX_QUEUES; - wxhw->mac.mcft_size = TXGBE_SP_MC_TBL_SIZE; - - adapter->mac_table = kcalloc(wxhw->mac.num_rar_entries, - sizeof(struct txgbe_mac_addr), - GFP_KERNEL); - if (!adapter->mac_table) { - netif_err(adapter, probe, adapter->netdev, - "mac_table allocation failed\n"); - return -ENOMEM; - } - return 0; } @@ -278,23 +170,23 @@ static int txgbe_sw_init(struct txgbe_adapter *adapter) **/ static int txgbe_open(struct net_device *netdev) { - struct txgbe_adapter *adapter = netdev_priv(netdev); + struct wx *wx = netdev_priv(netdev); - txgbe_up_complete(adapter); + txgbe_up_complete(wx); return 0; } /** * txgbe_close_suspend - actions necessary to both suspend and close flows - * @adapter: the private adapter struct + * @wx: the private wx struct * * This function should contain the necessary work common to both suspending * and closing of the device. */ -static void txgbe_close_suspend(struct txgbe_adapter *adapter) +static void txgbe_close_suspend(struct wx *wx) { - txgbe_disable_device(adapter); + txgbe_disable_device(wx); } /** @@ -310,29 +202,28 @@ static void txgbe_close_suspend(struct txgbe_adapter *adapter) **/ static int txgbe_close(struct net_device *netdev) { - struct txgbe_adapter *adapter = netdev_priv(netdev); + struct wx *wx = netdev_priv(netdev); - txgbe_down(adapter); - wx_control_hw(&adapter->hw.wxhw, false); + txgbe_down(wx); + wx_control_hw(wx, false); return 0; } static void txgbe_dev_shutdown(struct pci_dev *pdev, bool *enable_wake) { - struct txgbe_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; - struct txgbe_hw *hw = &adapter->hw; - struct wx_hw *wxhw = &hw->wxhw; + struct wx *wx = pci_get_drvdata(pdev); + struct net_device *netdev; + netdev = wx->netdev; netif_device_detach(netdev); rtnl_lock(); if (netif_running(netdev)) - txgbe_close_suspend(adapter); + txgbe_close_suspend(wx); rtnl_unlock(); - wx_control_hw(wxhw, false); + wx_control_hw(wx, false); pci_disable_device(pdev); } @@ -355,39 +246,12 @@ static netdev_tx_t txgbe_xmit_frame(struct sk_buff *skb, return NETDEV_TX_OK; } -/** - * txgbe_set_mac - Change the Ethernet Address of the NIC - * @netdev: network interface device structure - * @p: pointer to an address structure - * - * Returns 0 on success, negative on failure - **/ -static int txgbe_set_mac(struct net_device *netdev, void *p) -{ - struct txgbe_adapter *adapter = netdev_priv(netdev); - struct wx_hw *wxhw = &adapter->hw.wxhw; - struct sockaddr *addr = p; - int retval; - - retval = eth_prepare_mac_addr_change(netdev, addr); - if (retval) - return retval; - - txgbe_del_mac_filter(adapter, wxhw->mac.addr, 0); - eth_hw_addr_set(netdev, addr->sa_data); - memcpy(wxhw->mac.addr, addr->sa_data, netdev->addr_len); - - txgbe_mac_set_default_filter(adapter, wxhw->mac.addr); - - return 0; -} - static const struct net_device_ops txgbe_netdev_ops = { .ndo_open = txgbe_open, .ndo_stop = txgbe_close, .ndo_start_xmit = txgbe_xmit_frame, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = txgbe_set_mac, + .ndo_set_mac_address = wx_set_mac, }; /** @@ -398,17 +262,15 @@ static const struct net_device_ops txgbe_netdev_ops = { * Returns 0 on success, negative on failure * * txgbe_probe initializes an adapter identified by a pci_dev structure. - * The OS initialization, configuring of the adapter private structure, + * The OS initialization, configuring of the wx private structure, * and a hardware reset occur. **/ static int txgbe_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) { - struct txgbe_adapter *adapter = NULL; - struct txgbe_hw *hw = NULL; - struct wx_hw *wxhw = NULL; struct net_device *netdev; int err, expected_gts; + struct wx *wx = NULL; u16 eeprom_verh = 0, eeprom_verl = 0, offset = 0; u16 eeprom_cfg_blkh = 0, eeprom_cfg_blkl = 0; @@ -440,7 +302,7 @@ static int txgbe_probe(struct pci_dev *pdev, pci_set_master(pdev); netdev = devm_alloc_etherdev_mqs(&pdev->dev, - sizeof(struct txgbe_adapter), + sizeof(struct wx), TXGBE_MAX_TX_QUEUES, TXGBE_MAX_RX_QUEUES); if (!netdev) { @@ -450,17 +312,16 @@ static int txgbe_probe(struct pci_dev *pdev, SET_NETDEV_DEV(netdev, &pdev->dev); - adapter = netdev_priv(netdev); - adapter->netdev = netdev; - adapter->pdev = pdev; - hw = &adapter->hw; - wxhw = &hw->wxhw; - adapter->msg_enable = (1 << DEFAULT_DEBUG_LEVEL_SHIFT) - 1; - - adapter->io_addr = devm_ioremap(&pdev->dev, - pci_resource_start(pdev, 0), - pci_resource_len(pdev, 0)); - if (!adapter->io_addr) { + wx = netdev_priv(netdev); + wx->netdev = netdev; + wx->pdev = pdev; + + wx->msg_enable = (1 << DEFAULT_DEBUG_LEVEL_SHIFT) - 1; + + wx->hw_addr = devm_ioremap(&pdev->dev, + pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!wx->hw_addr) { err = -EIO; goto err_pci_release_regions; } @@ -468,25 +329,25 @@ static int txgbe_probe(struct pci_dev *pdev, netdev->netdev_ops = &txgbe_netdev_ops; /* setup the private structure */ - err = txgbe_sw_init(adapter); + err = txgbe_sw_init(wx); if (err) goto err_free_mac_table; /* check if flash load is done after hw power up */ - err = wx_check_flash_load(wxhw, TXGBE_SPI_ILDR_STATUS_PERST); + err = wx_check_flash_load(wx, TXGBE_SPI_ILDR_STATUS_PERST); if (err) goto err_free_mac_table; - err = wx_check_flash_load(wxhw, TXGBE_SPI_ILDR_STATUS_PWRRST); + err = wx_check_flash_load(wx, TXGBE_SPI_ILDR_STATUS_PWRRST); if (err) goto err_free_mac_table; - err = wx_mng_present(wxhw); + err = wx_mng_present(wx); if (err) { dev_err(&pdev->dev, "Management capability is not present\n"); goto err_free_mac_table; } - err = txgbe_reset_hw(hw); + err = txgbe_reset_hw(wx); if (err) { dev_err(&pdev->dev, "HW Init failed: %d\n", err); goto err_free_mac_table; @@ -495,36 +356,36 @@ static int txgbe_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_HIGHDMA; /* make sure the EEPROM is good */ - err = txgbe_validate_eeprom_checksum(hw, NULL); + err = txgbe_validate_eeprom_checksum(wx, NULL); if (err != 0) { dev_err(&pdev->dev, "The EEPROM Checksum Is Not Valid\n"); - wr32(wxhw, WX_MIS_RST, WX_MIS_RST_SW_RST); + wr32(wx, WX_MIS_RST, WX_MIS_RST_SW_RST); err = -EIO; goto err_free_mac_table; } - eth_hw_addr_set(netdev, wxhw->mac.perm_addr); - txgbe_mac_set_default_filter(adapter, wxhw->mac.perm_addr); + eth_hw_addr_set(netdev, wx->mac.perm_addr); + wx_mac_set_default_filter(wx, wx->mac.perm_addr); /* Save off EEPROM version number and Option Rom version which * together make a unique identify for the eeprom */ - wx_read_ee_hostif(wxhw, - wxhw->eeprom.sw_region_offset + TXGBE_EEPROM_VERSION_H, + wx_read_ee_hostif(wx, + wx->eeprom.sw_region_offset + TXGBE_EEPROM_VERSION_H, &eeprom_verh); - wx_read_ee_hostif(wxhw, - wxhw->eeprom.sw_region_offset + TXGBE_EEPROM_VERSION_L, + wx_read_ee_hostif(wx, + wx->eeprom.sw_region_offset + TXGBE_EEPROM_VERSION_L, &eeprom_verl); etrack_id = (eeprom_verh << 16) | eeprom_verl; - wx_read_ee_hostif(wxhw, - wxhw->eeprom.sw_region_offset + TXGBE_ISCSI_BOOT_CONFIG, + wx_read_ee_hostif(wx, + wx->eeprom.sw_region_offset + TXGBE_ISCSI_BOOT_CONFIG, &offset); /* Make sure offset to SCSI block is valid */ if (!(offset == 0x0) && !(offset == 0xffff)) { - wx_read_ee_hostif(wxhw, offset + 0x84, &eeprom_cfg_blkh); - wx_read_ee_hostif(wxhw, offset + 0x83, &eeprom_cfg_blkl); + wx_read_ee_hostif(wx, offset + 0x84, &eeprom_cfg_blkh); + wx_read_ee_hostif(wx, offset + 0x83, &eeprom_cfg_blkl); /* Only display Option Rom if exist */ if (eeprom_cfg_blkl && eeprom_cfg_blkh) { @@ -532,15 +393,15 @@ static int txgbe_probe(struct pci_dev *pdev, build = (eeprom_cfg_blkl << 8) | (eeprom_cfg_blkh >> 8); patch = eeprom_cfg_blkh & 0x00ff; - snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id), + snprintf(wx->eeprom_id, sizeof(wx->eeprom_id), "0x%08x, %d.%d.%d", etrack_id, major, build, patch); } else { - snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id), + snprintf(wx->eeprom_id, sizeof(wx->eeprom_id), "0x%08x", etrack_id); } } else { - snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id), + snprintf(wx->eeprom_id, sizeof(wx->eeprom_id), "0x%08x", etrack_id); } @@ -548,7 +409,7 @@ static int txgbe_probe(struct pci_dev *pdev, if (err) goto err_release_hw; - pci_set_drvdata(pdev, adapter); + pci_set_drvdata(pdev, wx); /* calculate the expected PCIe bandwidth required for optimal * performance. Note that some older parts will never have enough @@ -556,27 +417,27 @@ static int txgbe_probe(struct pci_dev *pdev, * parts to ensure that no warning is displayed, as this could confuse * users otherwise. */ - expected_gts = txgbe_enumerate_functions(adapter) * 10; + expected_gts = txgbe_enumerate_functions(wx) * 10; /* don't check link if we failed to enumerate functions */ if (expected_gts > 0) - txgbe_check_minimum_link(adapter); + txgbe_check_minimum_link(wx); else dev_warn(&pdev->dev, "Failed to enumerate PF devices.\n"); /* First try to read PBA as a string */ - err = txgbe_read_pba_string(hw, part_str, TXGBE_PBANUM_LENGTH); + err = txgbe_read_pba_string(wx, part_str, TXGBE_PBANUM_LENGTH); if (err) strncpy(part_str, "Unknown", TXGBE_PBANUM_LENGTH); - netif_info(adapter, probe, netdev, "%pM\n", netdev->dev_addr); + netif_info(wx, probe, netdev, "%pM\n", netdev->dev_addr); return 0; err_release_hw: - wx_control_hw(wxhw, false); + wx_control_hw(wx, false); err_free_mac_table: - kfree(adapter->mac_table); + kfree(wx->mac_table); err_pci_release_regions: pci_disable_pcie_error_reporting(pdev); pci_release_selected_regions(pdev, @@ -597,16 +458,16 @@ err_pci_disable_dev: **/ static void txgbe_remove(struct pci_dev *pdev) { - struct txgbe_adapter *adapter = pci_get_drvdata(pdev); + struct wx *wx = pci_get_drvdata(pdev); struct net_device *netdev; - netdev = adapter->netdev; + netdev = wx->netdev; unregister_netdev(netdev); pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); - kfree(adapter->mac_table); + kfree(wx->mac_table); pci_disable_pcie_error_reporting(pdev); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 740a1c447e20..cbd705a9f4bd 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -67,8 +67,16 @@ #define TXGBE_PBANUM1_PTR 0x06 #define TXGBE_PBANUM_PTR_GUARD 0xFAFA -struct txgbe_hw { - struct wx_hw wxhw; -}; +#define TXGBE_MAX_FDIR_INDICES 63 + +#define TXGBE_MAX_RX_QUEUES (TXGBE_MAX_FDIR_INDICES + 1) +#define TXGBE_MAX_TX_QUEUES (TXGBE_MAX_FDIR_INDICES + 1) + +#define TXGBE_SP_MAX_TX_QUEUES 128 +#define TXGBE_SP_MAX_RX_QUEUES 128 +#define TXGBE_SP_RAR_ENTRIES 128 +#define TXGBE_SP_MC_TBL_SIZE 128 + +extern char txgbe_driver_name[]; #endif /* _TXGBE_TYPE_H_ */ diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index bd3b0c2655a2..83ff882f5d97 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -623,16 +623,10 @@ static int receive(struct net_device *dev, int cnt) /* --------------------------------------------------------------------- */ -#if defined(__i386__) && !defined(CONFIG_UML) -#include <asm/msr.h> #define GETTICK(x) \ ({ \ - if (boot_cpu_has(X86_FEATURE_TSC)) \ - x = (unsigned int)rdtsc(); \ + x = (unsigned int)get_cycles(); \ }) -#else /* __i386__ && !CONFIG_UML */ -#define GETTICK(x) -#endif /* __i386__ && !CONFIG_UML */ static void epp_bh(struct work_struct *work) { diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c index d458a35839cc..fd982cec8068 100644 --- a/drivers/net/ipa/ipa_interrupt.c +++ b/drivers/net/ipa/ipa_interrupt.c @@ -26,6 +26,8 @@ #include "ipa.h" #include "ipa_reg.h" #include "ipa_endpoint.h" +#include "ipa_power.h" +#include "ipa_uc.h" #include "ipa_interrupt.h" /** @@ -33,47 +35,47 @@ * @ipa: IPA pointer * @irq: Linux IRQ number used for IPA interrupts * @enabled: Mask indicating which interrupts are enabled - * @handler: Array of handlers indexed by IPA interrupt ID */ struct ipa_interrupt { struct ipa *ipa; u32 irq; u32 enabled; - ipa_irq_handler_t handler[IPA_IRQ_COUNT]; }; -/* Returns true if the interrupt type is associated with the microcontroller */ -static bool ipa_interrupt_uc(struct ipa_interrupt *interrupt, u32 irq_id) -{ - return irq_id == IPA_IRQ_UC_0 || irq_id == IPA_IRQ_UC_1; -} - /* Process a particular interrupt type that has been received */ static void ipa_interrupt_process(struct ipa_interrupt *interrupt, u32 irq_id) { - bool uc_irq = ipa_interrupt_uc(interrupt, irq_id); struct ipa *ipa = interrupt->ipa; const struct ipa_reg *reg; u32 mask = BIT(irq_id); u32 offset; - /* For microcontroller interrupts, clear the interrupt right away, - * "to avoid clearing unhandled interrupts." - */ reg = ipa_reg(ipa, IPA_IRQ_CLR); offset = ipa_reg_offset(reg); - if (uc_irq) - iowrite32(mask, ipa->reg_virt + offset); - - if (irq_id < IPA_IRQ_COUNT && interrupt->handler[irq_id]) - interrupt->handler[irq_id](interrupt->ipa, irq_id); - /* Clearing the SUSPEND_TX interrupt also clears the register - * that tells us which suspended endpoint(s) caused the interrupt, - * so defer clearing until after the handler has been called. - */ - if (!uc_irq) + switch (irq_id) { + case IPA_IRQ_UC_0: + case IPA_IRQ_UC_1: + /* For microcontroller interrupts, clear the interrupt right + * away, "to avoid clearing unhandled interrupts." + */ + iowrite32(mask, ipa->reg_virt + offset); + ipa_uc_interrupt_handler(ipa, irq_id); + break; + + case IPA_IRQ_TX_SUSPEND: + /* Clearing the SUSPEND_TX interrupt also clears the + * register that tells us which suspended endpoint(s) + * caused the interrupt, so defer clearing until after + * the handler has been called. + */ + ipa_power_suspend_handler(ipa, irq_id); + fallthrough; + + default: /* Silently ignore (and clear) any other condition */ iowrite32(mask, ipa->reg_virt + offset); + break; + } } /* IPA IRQ handler is threaded */ @@ -127,6 +129,29 @@ out_power_put: return IRQ_HANDLED; } +static void ipa_interrupt_enabled_update(struct ipa *ipa) +{ + const struct ipa_reg *reg = ipa_reg(ipa, IPA_IRQ_EN); + + iowrite32(ipa->interrupt->enabled, ipa->reg_virt + ipa_reg_offset(reg)); +} + +/* Enable an IPA interrupt type */ +void ipa_interrupt_enable(struct ipa *ipa, enum ipa_irq_id ipa_irq) +{ + /* Update the IPA interrupt mask to enable it */ + ipa->interrupt->enabled |= BIT(ipa_irq); + ipa_interrupt_enabled_update(ipa); +} + +/* Disable an IPA interrupt type */ +void ipa_interrupt_disable(struct ipa *ipa, enum ipa_irq_id ipa_irq) +{ + /* Update the IPA interrupt mask to disable it */ + ipa->interrupt->enabled &= ~BIT(ipa_irq); + ipa_interrupt_enabled_update(ipa); +} + /* Common function used to enable/disable TX_SUSPEND for an endpoint */ static void ipa_interrupt_suspend_control(struct ipa_interrupt *interrupt, u32 endpoint_id, bool enable) @@ -200,44 +225,6 @@ void ipa_interrupt_simulate_suspend(struct ipa_interrupt *interrupt) ipa_interrupt_process(interrupt, IPA_IRQ_TX_SUSPEND); } -/* Add a handler for an IPA interrupt */ -void ipa_interrupt_add(struct ipa_interrupt *interrupt, - enum ipa_irq_id ipa_irq, ipa_irq_handler_t handler) -{ - struct ipa *ipa = interrupt->ipa; - const struct ipa_reg *reg; - - if (WARN_ON(ipa_irq >= IPA_IRQ_COUNT)) - return; - - interrupt->handler[ipa_irq] = handler; - - /* Update the IPA interrupt mask to enable it */ - interrupt->enabled |= BIT(ipa_irq); - - reg = ipa_reg(ipa, IPA_IRQ_EN); - iowrite32(interrupt->enabled, ipa->reg_virt + ipa_reg_offset(reg)); -} - -/* Remove the handler for an IPA interrupt type */ -void -ipa_interrupt_remove(struct ipa_interrupt *interrupt, enum ipa_irq_id ipa_irq) -{ - struct ipa *ipa = interrupt->ipa; - const struct ipa_reg *reg; - - if (WARN_ON(ipa_irq >= IPA_IRQ_COUNT)) - return; - - /* Update the IPA interrupt mask to disable it */ - interrupt->enabled &= ~BIT(ipa_irq); - - reg = ipa_reg(ipa, IPA_IRQ_EN); - iowrite32(interrupt->enabled, ipa->reg_virt + ipa_reg_offset(reg)); - - interrupt->handler[ipa_irq] = NULL; -} - /* Configure the IPA interrupt framework */ struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa) { diff --git a/drivers/net/ipa/ipa_interrupt.h b/drivers/net/ipa/ipa_interrupt.h index f31fd9965fdc..764a65e6b503 100644 --- a/drivers/net/ipa/ipa_interrupt.h +++ b/drivers/net/ipa/ipa_interrupt.h @@ -11,39 +11,7 @@ struct ipa; struct ipa_interrupt; - -/** - * typedef ipa_irq_handler_t - IPA interrupt handler function type - * @ipa: IPA pointer - * @irq_id: interrupt type - * - * Callback function registered by ipa_interrupt_add() to handle a specific - * IPA interrupt type - */ -typedef void (*ipa_irq_handler_t)(struct ipa *ipa, enum ipa_irq_id irq_id); - -/** - * ipa_interrupt_add() - Register a handler for an IPA interrupt type - * @interrupt: IPA interrupt structure - * @irq_id: IPA interrupt type - * @handler: Handler function for the interrupt - * - * Add a handler for an IPA interrupt and enable it. IPA interrupt - * handlers are run in threaded interrupt context, so are allowed to - * block. - */ -void ipa_interrupt_add(struct ipa_interrupt *interrupt, enum ipa_irq_id irq_id, - ipa_irq_handler_t handler); - -/** - * ipa_interrupt_remove() - Remove the handler for an IPA interrupt type - * @interrupt: IPA interrupt structure - * @irq_id: IPA interrupt type - * - * Remove an IPA interrupt handler and disable it. - */ -void ipa_interrupt_remove(struct ipa_interrupt *interrupt, - enum ipa_irq_id irq_id); +enum ipa_irq_id; /** * ipa_interrupt_suspend_enable - Enable TX_SUSPEND for an endpoint @@ -86,6 +54,20 @@ void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt); void ipa_interrupt_simulate_suspend(struct ipa_interrupt *interrupt); /** + * ipa_interrupt_enable() - Enable an IPA interrupt type + * @ipa: IPA pointer + * @ipa_irq: IPA interrupt ID + */ +void ipa_interrupt_enable(struct ipa *ipa, enum ipa_irq_id ipa_irq); + +/** + * ipa_interrupt_disable() - Disable an IPA interrupt type + * @ipa: IPA pointer + * @ipa_irq: IPA interrupt ID + */ +void ipa_interrupt_disable(struct ipa *ipa, enum ipa_irq_id ipa_irq); + +/** * ipa_interrupt_config() - Configure the IPA interrupt framework * @ipa: IPA pointer * diff --git a/drivers/net/ipa/ipa_power.c b/drivers/net/ipa/ipa_power.c index 8420f93128a2..a282512ebd2d 100644 --- a/drivers/net/ipa/ipa_power.c +++ b/drivers/net/ipa/ipa_power.c @@ -202,17 +202,7 @@ u32 ipa_core_clock_rate(struct ipa *ipa) return ipa->power ? (u32)clk_get_rate(ipa->power->core) : 0; } -/** - * ipa_suspend_handler() - Handle the suspend IPA interrupt - * @ipa: IPA pointer - * @irq_id: IPA interrupt type (unused) - * - * If an RX endpoint is suspended, and the IPA has a packet destined for - * that endpoint, the IPA generates a SUSPEND interrupt to inform the AP - * that it should resume the endpoint. If we get one of these interrupts - * we just wake up the system. - */ -static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id) +void ipa_power_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id) { /* To handle an IPA interrupt we will have resumed the hardware * just to handle the interrupt, so we're done. If we are in a @@ -335,12 +325,11 @@ int ipa_power_setup(struct ipa *ipa) { int ret; - ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND, - ipa_suspend_handler); + ipa_interrupt_enable(ipa, IPA_IRQ_TX_SUSPEND); ret = device_init_wakeup(&ipa->pdev->dev, true); if (ret) - ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND); + ipa_interrupt_disable(ipa, IPA_IRQ_TX_SUSPEND); return ret; } @@ -348,7 +337,7 @@ int ipa_power_setup(struct ipa *ipa) void ipa_power_teardown(struct ipa *ipa) { (void)device_init_wakeup(&ipa->pdev->dev, false); - ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND); + ipa_interrupt_disable(ipa, IPA_IRQ_TX_SUSPEND); } /* Initialize IPA power management */ diff --git a/drivers/net/ipa/ipa_power.h b/drivers/net/ipa/ipa_power.h index 896f052e51a1..3a4c59ea1222 100644 --- a/drivers/net/ipa/ipa_power.h +++ b/drivers/net/ipa/ipa_power.h @@ -10,6 +10,7 @@ struct device; struct ipa; struct ipa_power_data; +enum ipa_irq_id; /* IPA device power management function block */ extern const struct dev_pm_ops ipa_pm_ops; @@ -48,6 +49,17 @@ void ipa_power_modem_queue_active(struct ipa *ipa); void ipa_power_retention(struct ipa *ipa, bool enable); /** + * ipa_power_suspend_handler() - Handler for SUSPEND IPA interrupts + * @ipa: IPA pointer + * @irq_id: IPA interrupt ID (unused) + * + * If an RX endpoint is suspended, and the IPA has a packet destined for + * that endpoint, the IPA generates a SUSPEND interrupt to inform the AP + * that it should resume the endpoint. + */ +void ipa_power_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id); + +/** * ipa_power_setup() - Set up IPA power management * @ipa: IPA pointer * diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c index f0ee47281015..cb8a76a75f21 100644 --- a/drivers/net/ipa/ipa_uc.c +++ b/drivers/net/ipa/ipa_uc.c @@ -124,7 +124,7 @@ static struct ipa_uc_mem_area *ipa_uc_shared(struct ipa *ipa) } /* Microcontroller event IPA interrupt handler */ -static void ipa_uc_event_handler(struct ipa *ipa, enum ipa_irq_id irq_id) +static void ipa_uc_event_handler(struct ipa *ipa) { struct ipa_uc_mem_area *shared = ipa_uc_shared(ipa); struct device *dev = &ipa->pdev->dev; @@ -138,7 +138,7 @@ static void ipa_uc_event_handler(struct ipa *ipa, enum ipa_irq_id irq_id) } /* Microcontroller response IPA interrupt handler */ -static void ipa_uc_response_hdlr(struct ipa *ipa, enum ipa_irq_id irq_id) +static void ipa_uc_response_hdlr(struct ipa *ipa) { struct ipa_uc_mem_area *shared = ipa_uc_shared(ipa); struct device *dev = &ipa->pdev->dev; @@ -170,13 +170,22 @@ static void ipa_uc_response_hdlr(struct ipa *ipa, enum ipa_irq_id irq_id) } } +void ipa_uc_interrupt_handler(struct ipa *ipa, enum ipa_irq_id irq_id) +{ + /* Silently ignore anything unrecognized */ + if (irq_id == IPA_IRQ_UC_0) + ipa_uc_event_handler(ipa); + else if (irq_id == IPA_IRQ_UC_1) + ipa_uc_response_hdlr(ipa); +} + /* Configure the IPA microcontroller subsystem */ void ipa_uc_config(struct ipa *ipa) { ipa->uc_powered = false; ipa->uc_loaded = false; - ipa_interrupt_add(ipa->interrupt, IPA_IRQ_UC_0, ipa_uc_event_handler); - ipa_interrupt_add(ipa->interrupt, IPA_IRQ_UC_1, ipa_uc_response_hdlr); + ipa_interrupt_enable(ipa, IPA_IRQ_UC_0); + ipa_interrupt_enable(ipa, IPA_IRQ_UC_1); } /* Inverse of ipa_uc_config() */ @@ -184,8 +193,8 @@ void ipa_uc_deconfig(struct ipa *ipa) { struct device *dev = &ipa->pdev->dev; - ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_1); - ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_0); + ipa_interrupt_disable(ipa, IPA_IRQ_UC_1); + ipa_interrupt_disable(ipa, IPA_IRQ_UC_0); if (ipa->uc_loaded) ipa_power_retention(ipa, false); diff --git a/drivers/net/ipa/ipa_uc.h b/drivers/net/ipa/ipa_uc.h index 8514096e6f36..85aa0df818c2 100644 --- a/drivers/net/ipa/ipa_uc.h +++ b/drivers/net/ipa/ipa_uc.h @@ -7,6 +7,14 @@ #define _IPA_UC_H_ struct ipa; +enum ipa_irq_id; + +/** + * ipa_uc_interrupt_handler() - Handler for microcontroller IPA interrupts + * @ipa: IPA pointer + * @irq_id: IPA interrupt ID + */ +void ipa_uc_interrupt_handler(struct ipa *ipa, enum ipa_irq_id irq_id); /** * ipa_uc_config() - Configure the IPA microcontroller subsystem diff --git a/drivers/net/mdio/mdio-bitbang.c b/drivers/net/mdio/mdio-bitbang.c index 07609114a26b..b83932562be2 100644 --- a/drivers/net/mdio/mdio-bitbang.c +++ b/drivers/net/mdio/mdio-bitbang.c @@ -127,14 +127,12 @@ static void mdiobb_cmd(struct mdiobb_ctrl *ctrl, int op, u8 phy, u8 reg) /* In clause 45 mode all commands are prefixed by MDIO_ADDR to specify the lower 16 bits of the 21 bit address. This transfer is done identically to a - MDIO_WRITE except for a different code. To enable clause 45 mode or - MII_ADDR_C45 into the address. Theoretically clause 45 and normal devices - can exist on the same bus. Normal devices should ignore the MDIO_ADDR + MDIO_WRITE except for a different code. Theoretically clause 45 and normal + devices can exist on the same bus. Normal devices should ignore the MDIO_ADDR phase. */ -static int mdiobb_cmd_addr(struct mdiobb_ctrl *ctrl, int phy, u32 addr) +static void mdiobb_cmd_addr(struct mdiobb_ctrl *ctrl, int phy, int dev_addr, + int reg) { - unsigned int dev_addr = (addr >> 16) & 0x1F; - unsigned int reg = addr & 0xFFFF; mdiobb_cmd(ctrl, MDIO_C45_ADDR, phy, dev_addr); /* send the turnaround (10) */ @@ -145,21 +143,13 @@ static int mdiobb_cmd_addr(struct mdiobb_ctrl *ctrl, int phy, u32 addr) ctrl->ops->set_mdio_dir(ctrl, 0); mdiobb_get_bit(ctrl); - - return dev_addr; } -int mdiobb_read(struct mii_bus *bus, int phy, int reg) +static int mdiobb_read_common(struct mii_bus *bus, int phy) { struct mdiobb_ctrl *ctrl = bus->priv; int ret, i; - if (reg & MII_ADDR_C45) { - reg = mdiobb_cmd_addr(ctrl, phy, reg); - mdiobb_cmd(ctrl, MDIO_C45_READ, phy, reg); - } else - mdiobb_cmd(ctrl, ctrl->op_c22_read, phy, reg); - ctrl->ops->set_mdio_dir(ctrl, 0); /* check the turnaround bit: the PHY should be driving it to zero, if this @@ -180,17 +170,31 @@ int mdiobb_read(struct mii_bus *bus, int phy, int reg) mdiobb_get_bit(ctrl); return ret; } -EXPORT_SYMBOL(mdiobb_read); -int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val) +int mdiobb_read_c22(struct mii_bus *bus, int phy, int reg) { struct mdiobb_ctrl *ctrl = bus->priv; - if (reg & MII_ADDR_C45) { - reg = mdiobb_cmd_addr(ctrl, phy, reg); - mdiobb_cmd(ctrl, MDIO_C45_WRITE, phy, reg); - } else - mdiobb_cmd(ctrl, ctrl->op_c22_write, phy, reg); + mdiobb_cmd(ctrl, ctrl->op_c22_read, phy, reg); + + return mdiobb_read_common(bus, phy); +} +EXPORT_SYMBOL(mdiobb_read_c22); + +int mdiobb_read_c45(struct mii_bus *bus, int phy, int devad, int reg) +{ + struct mdiobb_ctrl *ctrl = bus->priv; + + mdiobb_cmd_addr(ctrl, phy, devad, reg); + mdiobb_cmd(ctrl, MDIO_C45_READ, phy, reg); + + return mdiobb_read_common(bus, phy); +} +EXPORT_SYMBOL(mdiobb_read_c45); + +static int mdiobb_write_common(struct mii_bus *bus, u16 val) +{ + struct mdiobb_ctrl *ctrl = bus->priv; /* send the turnaround (10) */ mdiobb_send_bit(ctrl, 1); @@ -202,7 +206,27 @@ int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val) mdiobb_get_bit(ctrl); return 0; } -EXPORT_SYMBOL(mdiobb_write); + +int mdiobb_write_c22(struct mii_bus *bus, int phy, int reg, u16 val) +{ + struct mdiobb_ctrl *ctrl = bus->priv; + + mdiobb_cmd(ctrl, ctrl->op_c22_write, phy, reg); + + return mdiobb_write_common(bus, val); +} +EXPORT_SYMBOL(mdiobb_write_c22); + +int mdiobb_write_c45(struct mii_bus *bus, int phy, int devad, int reg, u16 val) +{ + struct mdiobb_ctrl *ctrl = bus->priv; + + mdiobb_cmd_addr(ctrl, phy, devad, reg); + mdiobb_cmd(ctrl, MDIO_C45_WRITE, phy, reg); + + return mdiobb_write_common(bus, val); +} +EXPORT_SYMBOL(mdiobb_write_c45); struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl) { @@ -214,8 +238,11 @@ struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl) __module_get(ctrl->ops->owner); - bus->read = mdiobb_read; - bus->write = mdiobb_write; + bus->read = mdiobb_read_c22; + bus->write = mdiobb_write_c22; + bus->read_c45 = mdiobb_read_c45; + bus->write_c45 = mdiobb_write_c45; + bus->priv = ctrl; if (!ctrl->override_op_c22) { ctrl->op_c22_read = MDIO_READ; diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index b962fc8e1397..738784fda117 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -1556,14 +1556,18 @@ int nsim_drv_probe(struct nsim_bus_dev *nsim_bus_dev) goto err_devlink_unlock; } - err = nsim_dev_resources_register(devlink); + err = devl_register(devlink); if (err) goto err_vfc_free; + err = nsim_dev_resources_register(devlink); + if (err) + goto err_dl_unregister; + err = devlink_params_register(devlink, nsim_devlink_params, ARRAY_SIZE(nsim_devlink_params)); if (err) - goto err_dl_unregister; + goto err_resource_unregister; nsim_devlink_set_params_init_values(nsim_dev, devlink); err = nsim_dev_dummy_region_init(nsim_dev, devlink); @@ -1607,7 +1611,6 @@ int nsim_drv_probe(struct nsim_bus_dev *nsim_bus_dev) nsim_dev->esw_mode = DEVLINK_ESWITCH_MODE_LEGACY; devlink_set_features(devlink, DEVLINK_F_RELOAD); devl_unlock(devlink); - devlink_register(devlink); return 0; err_hwstats_exit: @@ -1629,8 +1632,10 @@ err_dummy_region_exit: err_params_unregister: devlink_params_unregister(devlink, nsim_devlink_params, ARRAY_SIZE(nsim_devlink_params)); -err_dl_unregister: +err_resource_unregister: devl_resources_unregister(devlink); +err_dl_unregister: + devl_unregister(devlink); err_vfc_free: kfree(nsim_dev->vfconfigs); err_devlink_unlock: @@ -1668,7 +1673,6 @@ void nsim_drv_remove(struct nsim_bus_dev *nsim_bus_dev) struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); struct devlink *devlink = priv_to_devlink(nsim_dev); - devlink_unregister(devlink); devl_lock(devlink); nsim_dev_reload_destroy(nsim_dev); @@ -1677,6 +1681,7 @@ void nsim_drv_remove(struct nsim_bus_dev *nsim_bus_dev) devlink_params_unregister(devlink, nsim_devlink_params, ARRAY_SIZE(nsim_devlink_params)); devl_resources_unregister(devlink); + devl_unregister(devlink); kfree(nsim_dev->vfconfigs); kfree(nsim_dev->fa_cookie); devl_unlock(devlink); diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index f6a038a1d51e..bc428a816719 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -199,9 +199,7 @@ int xpcs_write(struct dw_xpcs *xpcs, int dev, u32 reg, u16 val) static int xpcs_modify_changed(struct dw_xpcs *xpcs, int dev, u32 reg, u16 mask, u16 set) { - u32 reg_addr = mdiobus_c45_addr(dev, reg); - - return mdiodev_modify_changed(xpcs->mdiodev, reg_addr, mask, set); + return mdiodev_c45_modify_changed(xpcs->mdiodev, dev, reg, mask, set); } static int xpcs_read_vendor(struct dw_xpcs *xpcs, int dev, u32 reg) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 1327290decab..f5df2edc94a5 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -277,6 +277,13 @@ config NXP_TJA11XX_PHY help Currently supports the NXP TJA1100 and TJA1101 PHY. +config NCN26000_PHY + tristate "Onsemi 10BASE-T1S Ethernet PHY" + help + Adds support for the onsemi 10BASE-T1S Ethernet PHY. + Currently supports the NCN26000 10BASE-T1S Industrial PHY + with MII interface. + config AT803X_PHY tristate "Qualcomm Atheros AR803X PHYs and QCA833x PHYs" depends on REGULATOR diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index f7138d3c896b..b5138066ba04 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -77,6 +77,7 @@ obj-$(CONFIG_MICROCHIP_T1_PHY) += microchip_t1.o obj-$(CONFIG_MICROSEMI_PHY) += mscc/ obj-$(CONFIG_MOTORCOMM_PHY) += motorcomm.o obj-$(CONFIG_NATIONAL_PHY) += national.o +obj-$(CONFIG_NCN26000_PHY) += ncn26000.o obj-$(CONFIG_NXP_C45_TJA11XX_PHY) += nxp-c45-tja11xx.o obj-$(CONFIG_NXP_TJA11XX_PHY) += nxp-tja11xx.o obj-$(CONFIG_QSEMI_PHY) += qsemi.o diff --git a/drivers/net/phy/mdio-open-alliance.h b/drivers/net/phy/mdio-open-alliance.h new file mode 100644 index 000000000000..931e14660d75 --- /dev/null +++ b/drivers/net/phy/mdio-open-alliance.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * mdio-open-alliance.h - definition of OPEN Alliance SIG standard registers + */ + +#ifndef __MDIO_OPEN_ALLIANCE__ +#define __MDIO_OPEN_ALLIANCE__ + +#include <linux/mdio.h> + +/* NOTE: all OATC14 registers are located in MDIO_MMD_VEND2 */ + +/* Open Alliance TC14 (10BASE-T1S) registers */ +#define MDIO_OATC14_PLCA_IDVER 0xca00 /* PLCA ID and version */ +#define MDIO_OATC14_PLCA_CTRL0 0xca01 /* PLCA Control register 0 */ +#define MDIO_OATC14_PLCA_CTRL1 0xca02 /* PLCA Control register 1 */ +#define MDIO_OATC14_PLCA_STATUS 0xca03 /* PLCA Status register */ +#define MDIO_OATC14_PLCA_TOTMR 0xca04 /* PLCA TO Timer register */ +#define MDIO_OATC14_PLCA_BURST 0xca05 /* PLCA BURST mode register */ + +/* Open Alliance TC14 PLCA IDVER register */ +#define MDIO_OATC14_PLCA_IDM 0xff00 /* PLCA MAP ID */ +#define MDIO_OATC14_PLCA_VER 0x00ff /* PLCA MAP version */ + +/* Open Alliance TC14 PLCA CTRL0 register */ +#define MDIO_OATC14_PLCA_EN BIT(15) /* PLCA enable */ +#define MDIO_OATC14_PLCA_RST BIT(14) /* PLCA reset */ + +/* Open Alliance TC14 PLCA CTRL1 register */ +#define MDIO_OATC14_PLCA_NCNT 0xff00 /* PLCA node count */ +#define MDIO_OATC14_PLCA_ID 0x00ff /* PLCA local node ID */ + +/* Open Alliance TC14 PLCA STATUS register */ +#define MDIO_OATC14_PLCA_PST BIT(15) /* PLCA status indication */ + +/* Open Alliance TC14 PLCA TOTMR register */ +#define MDIO_OATC14_PLCA_TOT 0x00ff + +/* Open Alliance TC14 PLCA BURST register */ +#define MDIO_OATC14_PLCA_MAXBC 0xff00 +#define MDIO_OATC14_PLCA_BTMR 0x00ff + +/* Version Identifiers */ +#define OATC14_IDM 0x0a00 + +#endif /* __MDIO_OPEN_ALLIANCE__ */ diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 1cd604cd1fa1..902e1c88ef58 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -526,8 +526,15 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) int i, err; struct gpio_desc *gpiod; - if (NULL == bus || NULL == bus->name || - NULL == bus->read || NULL == bus->write) + if (!bus || !bus->name) + return -EINVAL; + + /* An access method always needs both read and write operations */ + if (!!bus->read != !!bus->write || !!bus->read_c45 != !!bus->write_c45) + return -EINVAL; + + /* At least one method is mandatory */ + if (!bus->read && !bus->read_c45) return -EINVAL; if (bus->parent && bus->parent->of_node) @@ -759,7 +766,10 @@ int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum) lockdep_assert_held_once(&bus->mdio_lock); - retval = bus->read(bus, addr, regnum); + if (bus->read) + retval = bus->read(bus, addr, regnum); + else + retval = -EOPNOTSUPP; trace_mdio_access(bus, 1, addr, regnum, retval, retval); mdiobus_stats_acct(&bus->stats[addr], true, retval); @@ -785,7 +795,10 @@ int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val) lockdep_assert_held_once(&bus->mdio_lock); - err = bus->write(bus, addr, regnum, val); + if (bus->write) + err = bus->write(bus, addr, regnum, val); + else + err = -EOPNOTSUPP; trace_mdio_access(bus, 0, addr, regnum, val, err); mdiobus_stats_acct(&bus->stats[addr], false, err); @@ -826,6 +839,105 @@ int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, } EXPORT_SYMBOL_GPL(__mdiobus_modify_changed); +static u32 mdiobus_c45_addr(int devad, u16 regnum) +{ + return MII_ADDR_C45 | devad << MII_DEVADDR_C45_SHIFT | regnum; +} + +/** + * __mdiobus_c45_read - Unlocked version of the mdiobus_c45_read function + * @bus: the mii_bus struct + * @addr: the phy address + * @devad: device address to read + * @regnum: register number to read + * + * Read a MDIO bus register. Caller must hold the mdio bus lock. + * + * NOTE: MUST NOT be called from interrupt context. + */ +int __mdiobus_c45_read(struct mii_bus *bus, int addr, int devad, u32 regnum) +{ + int retval; + + lockdep_assert_held_once(&bus->mdio_lock); + + if (bus->read_c45) + retval = bus->read_c45(bus, addr, devad, regnum); + else + retval = bus->read(bus, addr, mdiobus_c45_addr(devad, regnum)); + + trace_mdio_access(bus, 1, addr, regnum, retval, retval); + mdiobus_stats_acct(&bus->stats[addr], true, retval); + + return retval; +} +EXPORT_SYMBOL(__mdiobus_c45_read); + +/** + * __mdiobus_c45_write - Unlocked version of the mdiobus_write function + * @bus: the mii_bus struct + * @addr: the phy address + * @devad: device address to read + * @regnum: register number to write + * @val: value to write to @regnum + * + * Write a MDIO bus register. Caller must hold the mdio bus lock. + * + * NOTE: MUST NOT be called from interrupt context. + */ +int __mdiobus_c45_write(struct mii_bus *bus, int addr, int devad, u32 regnum, + u16 val) +{ + int err; + + lockdep_assert_held_once(&bus->mdio_lock); + + if (bus->write_c45) + err = bus->write_c45(bus, addr, devad, regnum, val); + else + err = bus->write(bus, addr, mdiobus_c45_addr(devad, regnum), + val); + + trace_mdio_access(bus, 0, addr, regnum, val, err); + mdiobus_stats_acct(&bus->stats[addr], false, err); + + return err; +} +EXPORT_SYMBOL(__mdiobus_c45_write); + +/** + * __mdiobus_c45_modify_changed - Unlocked version of the mdiobus_modify function + * @bus: the mii_bus struct + * @addr: the phy address + * @devad: device address to read + * @regnum: register number to modify + * @mask: bit mask of bits to clear + * @set: bit mask of bits to set + * + * Read, modify, and if any change, write the register value back to the + * device. Any error returns a negative number. + * + * NOTE: MUST NOT be called from interrupt context. + */ +static int __mdiobus_c45_modify_changed(struct mii_bus *bus, int addr, + int devad, u32 regnum, u16 mask, + u16 set) +{ + int new, ret; + + ret = __mdiobus_c45_read(bus, addr, devad, regnum); + if (ret < 0) + return ret; + + new = (ret & ~mask) | set; + if (new == ret) + return 0; + + ret = __mdiobus_c45_write(bus, addr, devad, regnum, new); + + return ret < 0 ? ret : 1; +} + /** * mdiobus_read_nested - Nested version of the mdiobus_read function * @bus: the mii_bus struct @@ -874,6 +986,56 @@ int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum) EXPORT_SYMBOL(mdiobus_read); /** + * mdiobus_c45_read - Convenience function for reading a given MII mgmt register + * @bus: the mii_bus struct + * @addr: the phy address + * @devad: device address to read + * @regnum: register number to read + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +int mdiobus_c45_read(struct mii_bus *bus, int addr, int devad, u32 regnum) +{ + int retval; + + mutex_lock(&bus->mdio_lock); + retval = __mdiobus_c45_read(bus, addr, devad, regnum); + mutex_unlock(&bus->mdio_lock); + + return retval; +} +EXPORT_SYMBOL(mdiobus_c45_read); + +/** + * mdiobus_c45_read_nested - Nested version of the mdiobus_c45_read function + * @bus: the mii_bus struct + * @addr: the phy address + * @devad: device address to read + * @regnum: register number to read + * + * In case of nested MDIO bus access avoid lockdep false positives by + * using mutex_lock_nested(). + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +int mdiobus_c45_read_nested(struct mii_bus *bus, int addr, int devad, + u32 regnum) +{ + int retval; + + mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED); + retval = __mdiobus_c45_read(bus, addr, devad, regnum); + mutex_unlock(&bus->mdio_lock); + + return retval; +} +EXPORT_SYMBOL(mdiobus_c45_read_nested); + +/** * mdiobus_write_nested - Nested version of the mdiobus_write function * @bus: the mii_bus struct * @addr: the phy address @@ -923,6 +1085,59 @@ int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val) EXPORT_SYMBOL(mdiobus_write); /** + * mdiobus_c45_write - Convenience function for writing a given MII mgmt register + * @bus: the mii_bus struct + * @addr: the phy address + * @devad: device address to read + * @regnum: register number to write + * @val: value to write to @regnum + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +int mdiobus_c45_write(struct mii_bus *bus, int addr, int devad, u32 regnum, + u16 val) +{ + int err; + + mutex_lock(&bus->mdio_lock); + err = __mdiobus_c45_write(bus, addr, devad, regnum, val); + mutex_unlock(&bus->mdio_lock); + + return err; +} +EXPORT_SYMBOL(mdiobus_c45_write); + +/** + * mdiobus_c45_write_nested - Nested version of the mdiobus_c45_write function + * @bus: the mii_bus struct + * @addr: the phy address + * @devad: device address to read + * @regnum: register number to write + * @val: value to write to @regnum + * + * In case of nested MDIO bus access avoid lockdep false positives by + * using mutex_lock_nested(). + * + * NOTE: MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. + */ +int mdiobus_c45_write_nested(struct mii_bus *bus, int addr, int devad, + u32 regnum, u16 val) +{ + int err; + + mutex_lock_nested(&bus->mdio_lock, MDIO_MUTEX_NESTED); + err = __mdiobus_c45_write(bus, addr, devad, regnum, val); + mutex_unlock(&bus->mdio_lock); + + return err; +} +EXPORT_SYMBOL(mdiobus_c45_write_nested); + +/** * mdiobus_modify - Convenience function for modifying a given mdio device * register * @bus: the mii_bus struct @@ -944,6 +1159,30 @@ int mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask, u16 set) EXPORT_SYMBOL_GPL(mdiobus_modify); /** + * mdiobus_c45_modify - Convenience function for modifying a given mdio device + * register + * @bus: the mii_bus struct + * @addr: the phy address + * @devad: device address to read + * @regnum: register number to write + * @mask: bit mask of bits to clear + * @set: bit mask of bits to set + */ +int mdiobus_c45_modify(struct mii_bus *bus, int addr, int devad, u32 regnum, + u16 mask, u16 set) +{ + int err; + + mutex_lock(&bus->mdio_lock); + err = __mdiobus_c45_modify_changed(bus, addr, devad, regnum, + mask, set); + mutex_unlock(&bus->mdio_lock); + + return err < 0 ? err : 0; +} +EXPORT_SYMBOL_GPL(mdiobus_c45_modify); + +/** * mdiobus_modify_changed - Convenience function for modifying a given mdio * device register and returning if it changed * @bus: the mii_bus struct @@ -966,6 +1205,29 @@ int mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, EXPORT_SYMBOL_GPL(mdiobus_modify_changed); /** + * mdiobus_c45_modify_changed - Convenience function for modifying a given mdio + * device register and returning if it changed + * @bus: the mii_bus struct + * @addr: the phy address + * @devad: device address to read + * @regnum: register number to write + * @mask: bit mask of bits to clear + * @set: bit mask of bits to set + */ +int mdiobus_c45_modify_changed(struct mii_bus *bus, int devad, int addr, + u32 regnum, u16 mask, u16 set) +{ + int err; + + mutex_lock(&bus->mdio_lock); + err = __mdiobus_c45_modify_changed(bus, addr, devad, regnum, mask, set); + mutex_unlock(&bus->mdio_lock); + + return err; +} +EXPORT_SYMBOL_GPL(mdiobus_c45_modify_changed); + +/** * mdio_bus_match - determine if given MDIO driver supports the given * MDIO device * @dev: target MDIO device diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 26ce0c5defcd..d5b80c31ab91 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2088,7 +2088,8 @@ static int ksz886x_cable_test_get_status(struct phy_device *phydev, const struct kszphy_type *type = phydev->drv->driver_data; unsigned long pair_mask = type->pair_mask; int retries = 20; - int pair, ret; + int ret = 0; + int pair; *finished = false; @@ -2794,13 +2795,11 @@ static void lan8814_get_rx_ts(struct kszphy_ptp_priv *ptp_priv) } while (PTP_CAP_INFO_RX_TS_CNT_GET_(reg) > 0); } -static void lan8814_handle_ptp_interrupt(struct phy_device *phydev) +static void lan8814_handle_ptp_interrupt(struct phy_device *phydev, u16 status) { struct kszphy_priv *priv = phydev->priv; struct kszphy_ptp_priv *ptp_priv = &priv->ptp_priv; - u16 status; - status = lanphy_read_page_reg(phydev, 5, PTP_TSU_INT_STS); if (status & PTP_TSU_INT_STS_PTP_TX_TS_EN_) lan8814_get_tx_ts(ptp_priv); @@ -2899,8 +2898,8 @@ static int lan8804_config_intr(struct phy_device *phydev) static irqreturn_t lan8814_handle_interrupt(struct phy_device *phydev) { - int irq_status, tsu_irq_status; int ret = IRQ_NONE; + int irq_status; irq_status = phy_read(phydev, LAN8814_INTS); if (irq_status < 0) { @@ -2913,20 +2912,13 @@ static irqreturn_t lan8814_handle_interrupt(struct phy_device *phydev) ret = IRQ_HANDLED; } - while (1) { - tsu_irq_status = lanphy_read_page_reg(phydev, 4, - LAN8814_INTR_STS_REG); - - if (tsu_irq_status > 0 && - (tsu_irq_status & (LAN8814_INTR_STS_REG_1588_TSU0_ | - LAN8814_INTR_STS_REG_1588_TSU1_ | - LAN8814_INTR_STS_REG_1588_TSU2_ | - LAN8814_INTR_STS_REG_1588_TSU3_))) { - lan8814_handle_ptp_interrupt(phydev); - ret = IRQ_HANDLED; - } else { + while (true) { + irq_status = lanphy_read_page_reg(phydev, 5, PTP_TSU_INT_STS); + if (!irq_status) break; - } + + lan8814_handle_ptp_interrupt(phydev, irq_status); + ret = IRQ_HANDLED; } return ret; @@ -3016,10 +3008,6 @@ static int lan8814_ptp_probe_once(struct phy_device *phydev) { struct lan8814_shared_priv *shared = phydev->shared->priv; - if (!IS_ENABLED(CONFIG_PTP_1588_CLOCK) || - !IS_ENABLED(CONFIG_NETWORK_PHY_TIMESTAMPING)) - return 0; - /* Initialise shared lock for clock*/ mutex_init(&shared->shared_lock); @@ -3039,12 +3027,16 @@ static int lan8814_ptp_probe_once(struct phy_device *phydev) shared->ptp_clock = ptp_clock_register(&shared->ptp_clock_info, &phydev->mdio.dev); - if (IS_ERR_OR_NULL(shared->ptp_clock)) { + if (IS_ERR(shared->ptp_clock)) { phydev_err(phydev, "ptp_clock_register failed %lu\n", PTR_ERR(shared->ptp_clock)); return -EINVAL; } + /* Check if PHC support is missing at the configuration level */ + if (!shared->ptp_clock) + return 0; + phydev_dbg(phydev, "successfully registered ptp clock\n"); shared->phydev = phydev; diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c index 147d7a5a9b35..e5972b4ef6e8 100644 --- a/drivers/net/phy/mxl-gpy.c +++ b/drivers/net/phy/mxl-gpy.c @@ -12,6 +12,7 @@ #include <linux/mutex.h> #include <linux/phy.h> #include <linux/polynomial.h> +#include <linux/property.h> #include <linux/netdevice.h> /* PHY ID */ @@ -292,6 +293,10 @@ static int gpy_probe(struct phy_device *phydev) phydev->priv = priv; mutex_init(&priv->mbox_lock); + if (gpy_has_broken_mdint(phydev) && + !device_property_present(dev, "maxlinear,use-broken-interrupts")) + phydev->dev_flags |= PHY_F_NO_IRQ; + fw_version = phy_read(phydev, PHY_FWV); if (fw_version < 0) return fw_version; diff --git a/drivers/net/phy/ncn26000.c b/drivers/net/phy/ncn26000.c new file mode 100644 index 000000000000..5680584f659e --- /dev/null +++ b/drivers/net/phy/ncn26000.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* + * Driver for the onsemi 10BASE-T1S NCN26000 PHYs family. + * + * Copyright 2022 onsemi + */ +#include <linux/kernel.h> +#include <linux/bitfield.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mii.h> +#include <linux/phy.h> + +#include "mdio-open-alliance.h" + +#define PHY_ID_NCN26000 0x180FF5A1 + +#define NCN26000_REG_IRQ_CTL 16 +#define NCN26000_REG_IRQ_STATUS 17 + +// the NCN26000 maps link_ctrl to BMCR_ANENABLE +#define NCN26000_BCMR_LINK_CTRL_BIT BMCR_ANENABLE + +// the NCN26000 maps link_status to BMSR_ANEGCOMPLETE +#define NCN26000_BMSR_LINK_STATUS_BIT BMSR_ANEGCOMPLETE + +#define NCN26000_IRQ_LINKST_BIT BIT(0) +#define NCN26000_IRQ_PLCAST_BIT BIT(1) +#define NCN26000_IRQ_LJABBER_BIT BIT(2) +#define NCN26000_IRQ_RJABBER_BIT BIT(3) +#define NCN26000_IRQ_PLCAREC_BIT BIT(4) +#define NCN26000_IRQ_PHYSCOL_BIT BIT(5) +#define NCN26000_IRQ_RESET_BIT BIT(15) + +#define TO_TMR_DEFAULT 32 + +static int ncn26000_config_init(struct phy_device *phydev) +{ + /* HW bug workaround: the default value of the PLCA TO_TIMER should be + * 32, where the current version of NCN26000 reports 24. This will be + * fixed in future PHY versions. For the time being, we force the + * correct default here. + */ + return phy_write_mmd(phydev, MDIO_MMD_VEND2, MDIO_OATC14_PLCA_TOTMR, + TO_TMR_DEFAULT); +} + +static int ncn26000_config_aneg(struct phy_device *phydev) +{ + /* Note: the NCN26000 supports only P2MP link mode. Therefore, AN is not + * supported. However, this function is invoked by phylib to enable the + * PHY, regardless of the AN support. + */ + phydev->mdix_ctrl = ETH_TP_MDI_AUTO; + phydev->mdix = ETH_TP_MDI; + + // bring up the link + return phy_write(phydev, MII_BMCR, NCN26000_BCMR_LINK_CTRL_BIT); +} + +static int ncn26000_read_status(struct phy_device *phydev) +{ + /* The NCN26000 reports NCN26000_LINK_STATUS_BIT if the link status of + * the PHY is up. It further reports the logical AND of the link status + * and the PLCA status in the BMSR_LSTATUS bit. + */ + int ret; + + /* The link state is latched low so that momentary link + * drops can be detected. Do not double-read the status + * in polling mode to detect such short link drops except + * the link was already down. + */ + if (!phy_polling_mode(phydev) || !phydev->link) { + ret = phy_read(phydev, MII_BMSR); + if (ret < 0) + return ret; + else if (ret & NCN26000_BMSR_LINK_STATUS_BIT) + goto upd_link; + } + + ret = phy_read(phydev, MII_BMSR); + if (ret < 0) + return ret; + +upd_link: + // update link status + if (ret & NCN26000_BMSR_LINK_STATUS_BIT) { + phydev->link = 1; + phydev->pause = 0; + phydev->duplex = DUPLEX_HALF; + phydev->speed = SPEED_10; + } else { + phydev->link = 0; + phydev->duplex = DUPLEX_UNKNOWN; + phydev->speed = SPEED_UNKNOWN; + } + + return 0; +} + +static irqreturn_t ncn26000_handle_interrupt(struct phy_device *phydev) +{ + int ret; + + // read and aknowledge the IRQ status register + ret = phy_read(phydev, NCN26000_REG_IRQ_STATUS); + + // check only link status changes + if (ret < 0 || (ret & NCN26000_REG_IRQ_STATUS) == 0) + return IRQ_NONE; + + phy_trigger_machine(phydev); + return IRQ_HANDLED; +} + +static int ncn26000_config_intr(struct phy_device *phydev) +{ + int ret; + u16 irqe; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + // acknowledge IRQs + ret = phy_read(phydev, NCN26000_REG_IRQ_STATUS); + if (ret < 0) + return ret; + + // get link status notifications + irqe = NCN26000_IRQ_LINKST_BIT; + } else { + // disable all IRQs + irqe = 0; + } + + ret = phy_write(phydev, NCN26000_REG_IRQ_CTL, irqe); + if (ret != 0) + return ret; + + return 0; +} + +static struct phy_driver ncn26000_driver[] = { + { + PHY_ID_MATCH_MODEL(PHY_ID_NCN26000), + .name = "NCN26000", + .features = PHY_BASIC_T1S_P2MP_FEATURES, + .config_init = ncn26000_config_init, + .config_intr = ncn26000_config_intr, + .config_aneg = ncn26000_config_aneg, + .read_status = ncn26000_read_status, + .handle_interrupt = ncn26000_handle_interrupt, + .get_plca_cfg = genphy_c45_plca_get_cfg, + .set_plca_cfg = genphy_c45_plca_set_cfg, + .get_plca_status = genphy_c45_plca_get_status, + .soft_reset = genphy_soft_reset, + }, +}; + +module_phy_driver(ncn26000_driver); + +static struct mdio_device_id __maybe_unused ncn26000_tbl[] = { + { PHY_ID_MATCH_MODEL(PHY_ID_NCN26000) }, + { } +}; + +MODULE_DEVICE_TABLE(mdio, ncn26000_tbl); + +MODULE_AUTHOR("Piergiorgio Beruto"); +MODULE_DESCRIPTION("onsemi 10BASE-T1S PHY driver"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index a87a4b3ffce4..cff83220595c 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -8,6 +8,8 @@ #include <linux/mii.h> #include <linux/phy.h> +#include "mdio-open-alliance.h" + /** * genphy_c45_baset1_able - checks if the PMA has BASE-T1 extended abilities * @phydev: target phy_device struct @@ -931,6 +933,197 @@ int genphy_c45_fast_retrain(struct phy_device *phydev, bool enable) } EXPORT_SYMBOL_GPL(genphy_c45_fast_retrain); +/** + * genphy_c45_plca_get_cfg - get PLCA configuration from standard registers + * @phydev: target phy_device struct + * @plca_cfg: output structure to store the PLCA configuration + * + * Description: if the PHY complies to the Open Alliance TC14 10BASE-T1S PLCA + * Management Registers specifications, this function can be used to retrieve + * the current PLCA configuration from the standard registers in MMD 31. + */ +int genphy_c45_plca_get_cfg(struct phy_device *phydev, + struct phy_plca_cfg *plca_cfg) +{ + int ret; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, MDIO_OATC14_PLCA_IDVER); + if (ret < 0) + return ret; + + if ((ret & MDIO_OATC14_PLCA_IDM) != OATC14_IDM) + return -ENODEV; + + plca_cfg->version = ret & ~MDIO_OATC14_PLCA_IDM; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, MDIO_OATC14_PLCA_CTRL0); + if (ret < 0) + return ret; + + plca_cfg->enabled = !!(ret & MDIO_OATC14_PLCA_EN); + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, MDIO_OATC14_PLCA_CTRL1); + if (ret < 0) + return ret; + + plca_cfg->node_cnt = (ret & MDIO_OATC14_PLCA_NCNT) >> 8; + plca_cfg->node_id = (ret & MDIO_OATC14_PLCA_ID); + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, MDIO_OATC14_PLCA_TOTMR); + if (ret < 0) + return ret; + + plca_cfg->to_tmr = ret & MDIO_OATC14_PLCA_TOT; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, MDIO_OATC14_PLCA_BURST); + if (ret < 0) + return ret; + + plca_cfg->burst_cnt = (ret & MDIO_OATC14_PLCA_MAXBC) >> 8; + plca_cfg->burst_tmr = (ret & MDIO_OATC14_PLCA_BTMR); + + return 0; +} +EXPORT_SYMBOL_GPL(genphy_c45_plca_get_cfg); + +/** + * genphy_c45_plca_set_cfg - set PLCA configuration using standard registers + * @phydev: target phy_device struct + * @plca_cfg: structure containing the PLCA configuration. Fields set to -1 are + * not to be changed. + * + * Description: if the PHY complies to the Open Alliance TC14 10BASE-T1S PLCA + * Management Registers specifications, this function can be used to modify + * the PLCA configuration using the standard registers in MMD 31. + */ +int genphy_c45_plca_set_cfg(struct phy_device *phydev, + const struct phy_plca_cfg *plca_cfg) +{ + int ret; + u16 val; + + // PLCA IDVER is read-only + if (plca_cfg->version >= 0) + return -EINVAL; + + // first of all, disable PLCA if required + if (plca_cfg->enabled == 0) { + ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, + MDIO_OATC14_PLCA_CTRL0, + MDIO_OATC14_PLCA_EN); + + if (ret < 0) + return ret; + } + + // check if we need to set the PLCA node count, node ID, or both + if (plca_cfg->node_cnt >= 0 || plca_cfg->node_id >= 0) { + /* if one between node count and node ID is -not- to be + * changed, read the register to later perform merge/purge of + * the configuration as appropriate + */ + if (plca_cfg->node_cnt < 0 || plca_cfg->node_id < 0) { + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, + MDIO_OATC14_PLCA_CTRL1); + + if (ret < 0) + return ret; + + val = ret; + } + + if (plca_cfg->node_cnt >= 0) + val = (val & ~MDIO_OATC14_PLCA_NCNT) | + (plca_cfg->node_cnt << 8); + + if (plca_cfg->node_id >= 0) + val = (val & ~MDIO_OATC14_PLCA_ID) | + (plca_cfg->node_id); + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, + MDIO_OATC14_PLCA_CTRL1, val); + + if (ret < 0) + return ret; + } + + if (plca_cfg->to_tmr >= 0) { + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, + MDIO_OATC14_PLCA_TOTMR, + plca_cfg->to_tmr); + + if (ret < 0) + return ret; + } + + // check if we need to set the PLCA burst count, burst timer, or both + if (plca_cfg->burst_cnt >= 0 || plca_cfg->burst_tmr >= 0) { + /* if one between burst count and burst timer is -not- to be + * changed, read the register to later perform merge/purge of + * the configuration as appropriate + */ + if (plca_cfg->burst_cnt < 0 || plca_cfg->burst_tmr < 0) { + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, + MDIO_OATC14_PLCA_BURST); + + if (ret < 0) + return ret; + + val = ret; + } + + if (plca_cfg->burst_cnt >= 0) + val = (val & ~MDIO_OATC14_PLCA_MAXBC) | + (plca_cfg->burst_cnt << 8); + + if (plca_cfg->burst_tmr >= 0) + val = (val & ~MDIO_OATC14_PLCA_BTMR) | + (plca_cfg->burst_tmr); + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, + MDIO_OATC14_PLCA_BURST, val); + + if (ret < 0) + return ret; + } + + // if we need to enable PLCA, do it at the end + if (plca_cfg->enabled > 0) { + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, + MDIO_OATC14_PLCA_CTRL0, + MDIO_OATC14_PLCA_EN); + + if (ret < 0) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(genphy_c45_plca_set_cfg); + +/** + * genphy_c45_plca_get_status - get PLCA status from standard registers + * @phydev: target phy_device struct + * @plca_st: output structure to store the PLCA status + * + * Description: if the PHY complies to the Open Alliance TC14 10BASE-T1S PLCA + * Management Registers specifications, this function can be used to retrieve + * the current PLCA status information from the standard registers in MMD 31. + */ +int genphy_c45_plca_get_status(struct phy_device *phydev, + struct phy_plca_status *plca_st) +{ + int ret; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, MDIO_OATC14_PLCA_STATUS); + if (ret < 0) + return ret; + + plca_st->pst = !!(ret & MDIO_OATC14_PLCA_PST); + return 0; +} +EXPORT_SYMBOL_GPL(genphy_c45_plca_get_status); + struct phy_driver genphy_c45_driver = { .phy_id = 0xffffffff, .phy_id_mask = 0xffffffff, diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 5d08c627a516..a64186dc53f8 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -13,7 +13,7 @@ */ const char *phy_speed_to_str(int speed) { - BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 99, + BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 102, "Enum ethtool_link_mode_bit_indices and phylib are out of sync. " "If a speed or mode has been added please update phy_speed_to_str " "and the PHY settings array.\n"); @@ -260,6 +260,9 @@ static const struct phy_setting settings[] = { PHY_SETTING( 10, FULL, 10baseT_Full ), PHY_SETTING( 10, HALF, 10baseT_Half ), PHY_SETTING( 10, FULL, 10baseT1L_Full ), + PHY_SETTING( 10, FULL, 10baseT1S_Full ), + PHY_SETTING( 10, HALF, 10baseT1S_Half ), + PHY_SETTING( 10, HALF, 10baseT1S_P2MP_Half ), }; #undef PHY_SETTING diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index e5b6cb1a77f9..3378ca4f49b6 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -544,6 +544,198 @@ int phy_ethtool_get_stats(struct phy_device *phydev, EXPORT_SYMBOL(phy_ethtool_get_stats); /** + * phy_ethtool_get_plca_cfg - Get PLCA RS configuration + * @phydev: the phy_device struct + * @plca_cfg: where to store the retrieved configuration + * + * Retrieve the PLCA configuration from the PHY. Return 0 on success or a + * negative value if an error occurred. + */ +int phy_ethtool_get_plca_cfg(struct phy_device *phydev, + struct phy_plca_cfg *plca_cfg) +{ + int ret; + + if (!phydev->drv) { + ret = -EIO; + goto out; + } + + if (!phydev->drv->get_plca_cfg) { + ret = -EOPNOTSUPP; + goto out; + } + + mutex_lock(&phydev->lock); + ret = phydev->drv->get_plca_cfg(phydev, plca_cfg); + + mutex_unlock(&phydev->lock); +out: + return ret; +} + +/** + * plca_check_valid - Check PLCA configuration before enabling + * @phydev: the phy_device struct + * @plca_cfg: current PLCA configuration + * @extack: extack for reporting useful error messages + * + * Checks whether the PLCA and PHY configuration are consistent and it is safe + * to enable PLCA. Returns 0 on success or a negative value if the PLCA or PHY + * configuration is not consistent. + */ +static int plca_check_valid(struct phy_device *phydev, + const struct phy_plca_cfg *plca_cfg, + struct netlink_ext_ack *extack) +{ + int ret = 0; + + if (!linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT, + phydev->advertising)) { + ret = -EOPNOTSUPP; + NL_SET_ERR_MSG(extack, + "Point to Multi-Point mode is not enabled"); + } else if (plca_cfg->node_id >= 255) { + NL_SET_ERR_MSG(extack, "PLCA node ID is not set"); + ret = -EINVAL; + } + + return ret; +} + +/** + * phy_ethtool_set_plca_cfg - Set PLCA RS configuration + * @phydev: the phy_device struct + * @plca_cfg: new PLCA configuration to apply + * @extack: extack for reporting useful error messages + * + * Sets the PLCA configuration in the PHY. Return 0 on success or a + * negative value if an error occurred. + */ +int phy_ethtool_set_plca_cfg(struct phy_device *phydev, + const struct phy_plca_cfg *plca_cfg, + struct netlink_ext_ack *extack) +{ + struct phy_plca_cfg *curr_plca_cfg; + int ret; + + if (!phydev->drv) { + ret = -EIO; + goto out; + } + + if (!phydev->drv->set_plca_cfg || + !phydev->drv->get_plca_cfg) { + ret = -EOPNOTSUPP; + goto out; + } + + curr_plca_cfg = kmalloc(sizeof(*curr_plca_cfg), GFP_KERNEL); + if (!curr_plca_cfg) { + ret = -ENOMEM; + goto out; + } + + mutex_lock(&phydev->lock); + + ret = phydev->drv->get_plca_cfg(phydev, curr_plca_cfg); + if (ret) + goto out_drv; + + if (curr_plca_cfg->enabled < 0 && plca_cfg->enabled >= 0) { + NL_SET_ERR_MSG(extack, + "PHY does not support changing the PLCA 'enable' attribute"); + ret = -EINVAL; + goto out_drv; + } + + if (curr_plca_cfg->node_id < 0 && plca_cfg->node_id >= 0) { + NL_SET_ERR_MSG(extack, + "PHY does not support changing the PLCA 'local node ID' attribute"); + ret = -EINVAL; + goto out_drv; + } + + if (curr_plca_cfg->node_cnt < 0 && plca_cfg->node_cnt >= 0) { + NL_SET_ERR_MSG(extack, + "PHY does not support changing the PLCA 'node count' attribute"); + ret = -EINVAL; + goto out_drv; + } + + if (curr_plca_cfg->to_tmr < 0 && plca_cfg->to_tmr >= 0) { + NL_SET_ERR_MSG(extack, + "PHY does not support changing the PLCA 'TO timer' attribute"); + ret = -EINVAL; + goto out_drv; + } + + if (curr_plca_cfg->burst_cnt < 0 && plca_cfg->burst_cnt >= 0) { + NL_SET_ERR_MSG(extack, + "PHY does not support changing the PLCA 'burst count' attribute"); + ret = -EINVAL; + goto out_drv; + } + + if (curr_plca_cfg->burst_tmr < 0 && plca_cfg->burst_tmr >= 0) { + NL_SET_ERR_MSG(extack, + "PHY does not support changing the PLCA 'burst timer' attribute"); + ret = -EINVAL; + goto out_drv; + } + + // if enabling PLCA, perform a few sanity checks + if (plca_cfg->enabled > 0) { + // allow setting node_id concurrently with enabled + if (plca_cfg->node_id >= 0) + curr_plca_cfg->node_id = plca_cfg->node_id; + + ret = plca_check_valid(phydev, curr_plca_cfg, extack); + if (ret) + goto out_drv; + } + + ret = phydev->drv->set_plca_cfg(phydev, plca_cfg); + +out_drv: + kfree(curr_plca_cfg); + mutex_unlock(&phydev->lock); +out: + return ret; +} + +/** + * phy_ethtool_get_plca_status - Get PLCA RS status information + * @phydev: the phy_device struct + * @plca_st: where to store the retrieved status information + * + * Retrieve the PLCA status information from the PHY. Return 0 on success or a + * negative value if an error occurred. + */ +int phy_ethtool_get_plca_status(struct phy_device *phydev, + struct phy_plca_status *plca_st) +{ + int ret; + + if (!phydev->drv) { + ret = -EIO; + goto out; + } + + if (!phydev->drv->get_plca_status) { + ret = -EOPNOTSUPP; + goto out; + } + + mutex_lock(&phydev->lock); + ret = phydev->drv->get_plca_status(phydev, plca_st); + + mutex_unlock(&phydev->lock); +out: + return ret; +} + +/** * phy_start_cable_test - Start a cable test * * @phydev: the phy_device struct diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 716870a4499c..0d371a0a49f2 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -45,6 +45,9 @@ EXPORT_SYMBOL_GPL(phy_basic_features); __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_basic_t1_features); +__ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1s_p2mp_features) __ro_after_init; +EXPORT_SYMBOL_GPL(phy_basic_t1s_p2mp_features); + __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_gbit_features); @@ -98,6 +101,12 @@ const int phy_basic_t1_features_array[3] = { }; EXPORT_SYMBOL_GPL(phy_basic_t1_features_array); +const int phy_basic_t1s_p2mp_features_array[2] = { + ETHTOOL_LINK_MODE_TP_BIT, + ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT, +}; +EXPORT_SYMBOL_GPL(phy_basic_t1s_p2mp_features_array); + const int phy_gbit_features_array[2] = { ETHTOOL_LINK_MODE_1000baseT_Half_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT, @@ -138,6 +147,11 @@ static void features_init(void) ARRAY_SIZE(phy_basic_t1_features_array), phy_basic_t1_features); + /* 10 half, P2MP, TP */ + linkmode_set_bit_array(phy_basic_t1s_p2mp_features_array, + ARRAY_SIZE(phy_basic_t1s_p2mp_features_array), + phy_basic_t1s_p2mp_features); + /* 10/100 half/full + 1000 half/full */ linkmode_set_bit_array(phy_basic_ports_array, ARRAY_SIZE(phy_basic_ports_array), @@ -1487,6 +1501,13 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, phydev->interrupts = PHY_INTERRUPT_DISABLED; + /* PHYs can request to use poll mode even though they have an + * associated interrupt line. This could be the case if they + * detect a broken interrupt handling. + */ + if (phydev->dev_flags & PHY_F_NO_IRQ) + phydev->irq = PHY_POLL; + /* Port is set to PORT_TP by default and the actual PHY driver will set * it to different value depending on the PHY configuration. If we have * the generic PHY driver we can't figure it out, thus set the old @@ -3262,6 +3283,9 @@ static const struct ethtool_phy_ops phy_ethtool_phy_ops = { .get_sset_count = phy_ethtool_get_sset_count, .get_strings = phy_ethtool_get_strings, .get_stats = phy_ethtool_get_stats, + .get_plca_cfg = phy_ethtool_get_plca_cfg, + .set_plca_cfg = phy_ethtool_set_plca_cfg, + .get_plca_status = phy_ethtool_get_plca_status, .start_cable_test = phy_start_cable_test, .start_cable_test_tdr = phy_start_cable_test_tdr, }; diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 09cc65c0da93..319790221d7f 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -241,12 +241,16 @@ void phylink_caps_to_linkmodes(unsigned long *linkmodes, unsigned long caps) if (caps & MAC_ASYM_PAUSE) __set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, linkmodes); - if (caps & MAC_10HD) + if (caps & MAC_10HD) { __set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, linkmodes); + __set_bit(ETHTOOL_LINK_MODE_10baseT1S_Half_BIT, linkmodes); + __set_bit(ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT, linkmodes); + } if (caps & MAC_10FD) { __set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, linkmodes); __set_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, linkmodes); + __set_bit(ETHTOOL_LINK_MODE_10baseT1S_Full_BIT, linkmodes); } if (caps & MAC_100HD) { diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index c140edb4b648..80849d115e5d 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -747,13 +747,6 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, -/* Realtek RTL8152 Based USB 2.0 Ethernet Adapters */ -{ - USB_DEVICE_AND_INTERFACE_INFO(REALTEK_VENDOR_ID, 0x8152, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, -}, - /* Realtek RTL8153 Based USB 3.0 Ethernet Adapters */ { USB_DEVICE_AND_INTERFACE_INFO(REALTEK_VENDOR_ID, 0x8153, USB_CLASS_COMM, @@ -761,71 +754,6 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, -/* Samsung USB Ethernet Adapters */ -{ - USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, 0xa101, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, -}, - -#if IS_ENABLED(CONFIG_USB_RTL8152) -/* Linksys USB3GIGV1 Ethernet Adapter */ -{ - USB_DEVICE_AND_INTERFACE_INFO(LINKSYS_VENDOR_ID, 0x0041, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, -}, -#endif - -/* Lenovo ThinkPad OneLink+ Dock (based on Realtek RTL8153) */ -{ - USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3054, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, -}, - -/* ThinkPad USB-C Dock (based on Realtek RTL8153) */ -{ - USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3062, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, -}, - -/* ThinkPad Thunderbolt 3 Dock (based on Realtek RTL8153) */ -{ - USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3069, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, -}, - -/* ThinkPad Thunderbolt 3 Dock Gen 2 (based on Realtek RTL8153) */ -{ - USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3082, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, -}, - -/* Lenovo Thinkpad USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ -{ - USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x7205, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, -}, - -/* Lenovo USB C to Ethernet Adapter (based on Realtek RTL8153) */ -{ - USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x720c, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, -}, - -/* Lenovo USB-C Travel Hub (based on Realtek RTL8153) */ -{ - USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x7214, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, -}, - /* Lenovo Powered USB-C Travel Hub (4X90S92381, based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x721e, USB_CLASS_COMM, @@ -833,48 +761,6 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, -/* ThinkPad USB-C Dock Gen 2 (based on Realtek RTL8153) */ -{ - USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0xa387, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, -}, - -/* NVIDIA Tegra USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ -{ - USB_DEVICE_AND_INTERFACE_INFO(NVIDIA_VENDOR_ID, 0x09ff, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, -}, - -/* Microsoft Surface 2 dock (based on Realtek RTL8152) */ -{ - USB_DEVICE_AND_INTERFACE_INFO(MICROSOFT_VENDOR_ID, 0x07ab, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, -}, - -/* Microsoft Surface Ethernet Adapter (based on Realtek RTL8153) */ -{ - USB_DEVICE_AND_INTERFACE_INFO(MICROSOFT_VENDOR_ID, 0x07c6, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, -}, - -/* Microsoft Surface Ethernet Adapter (based on Realtek RTL8153B) */ -{ - USB_DEVICE_AND_INTERFACE_INFO(MICROSOFT_VENDOR_ID, 0x0927, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, -}, - -/* TP-LINK UE300 USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ -{ - USB_DEVICE_AND_INTERFACE_INFO(TPLINK_VENDOR_ID, 0x0601, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = 0, -}, - /* Aquantia AQtion USB to 5GbE Controller (based on AQC111U) */ { USB_DEVICE_AND_INTERFACE_INFO(AQUANTIA_VENDOR_ID, 0xc101, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 23da1d9dafd1..a26d3127a4fe 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9618,6 +9618,9 @@ static int rtl8152_probe(struct usb_interface *intf, if (version == RTL_VER_UNKNOWN) return -ENODEV; + if (intf->cur_altsetting->desc.bInterfaceClass != USB_CLASS_VENDOR_SPEC) + return -ENODEV; + if (!rtl_vendor_mode(intf)) return -ENODEV; @@ -9814,43 +9817,35 @@ static void rtl8152_disconnect(struct usb_interface *intf) } } -#define REALTEK_USB_DEVICE(vend, prod) { \ - USB_DEVICE_INTERFACE_CLASS(vend, prod, USB_CLASS_VENDOR_SPEC), \ -}, \ -{ \ - USB_DEVICE_AND_INTERFACE_INFO(vend, prod, USB_CLASS_COMM, \ - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), \ -} - /* table of devices that work with this driver */ static const struct usb_device_id rtl8152_table[] = { /* Realtek */ - REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8050), - REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8053), - REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8152), - REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153), - REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8155), - REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8156), + { USB_DEVICE(VENDOR_ID_REALTEK, 0x8050) }, + { USB_DEVICE(VENDOR_ID_REALTEK, 0x8053) }, + { USB_DEVICE(VENDOR_ID_REALTEK, 0x8152) }, + { USB_DEVICE(VENDOR_ID_REALTEK, 0x8153) }, + { USB_DEVICE(VENDOR_ID_REALTEK, 0x8155) }, + { USB_DEVICE(VENDOR_ID_REALTEK, 0x8156) }, /* Microsoft */ - REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab), - REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6), - REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927), - REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0c5e), - REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101), - REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f), - REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3054), - REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3062), - REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3069), - REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3082), - REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205), - REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x720c), - REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7214), - REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x721e), - REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0xa387), - REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041), - REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff), - REALTEK_USB_DEVICE(VENDOR_ID_TPLINK, 0x0601), + { USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab) }, + { USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6) }, + { USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927) }, + { USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0c5e) }, + { USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101) }, + { USB_DEVICE(VENDOR_ID_LENOVO, 0x304f) }, + { USB_DEVICE(VENDOR_ID_LENOVO, 0x3054) }, + { USB_DEVICE(VENDOR_ID_LENOVO, 0x3062) }, + { USB_DEVICE(VENDOR_ID_LENOVO, 0x3069) }, + { USB_DEVICE(VENDOR_ID_LENOVO, 0x3082) }, + { USB_DEVICE(VENDOR_ID_LENOVO, 0x7205) }, + { USB_DEVICE(VENDOR_ID_LENOVO, 0x720c) }, + { USB_DEVICE(VENDOR_ID_LENOVO, 0x7214) }, + { USB_DEVICE(VENDOR_ID_LENOVO, 0x721e) }, + { USB_DEVICE(VENDOR_ID_LENOVO, 0xa387) }, + { USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041) }, + { USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff) }, + { USB_DEVICE(VENDOR_ID_TPLINK, 0x0601) }, {} }; @@ -9870,7 +9865,61 @@ static struct usb_driver rtl8152_driver = { .disable_hub_initiated_lpm = 1, }; -module_usb_driver(rtl8152_driver); +static int rtl8152_cfgselector_probe(struct usb_device *udev) +{ + struct usb_host_config *c; + int i, num_configs; + + /* The vendor mode is not always config #1, so to find it out. */ + c = udev->config; + num_configs = udev->descriptor.bNumConfigurations; + for (i = 0; i < num_configs; (i++, c++)) { + struct usb_interface_descriptor *desc = NULL; + + if (!c->desc.bNumInterfaces) + continue; + desc = &c->intf_cache[0]->altsetting->desc; + if (desc->bInterfaceClass == USB_CLASS_VENDOR_SPEC) + break; + } + + if (i == num_configs) + return -ENODEV; + + if (usb_set_configuration(udev, c->desc.bConfigurationValue)) { + dev_err(&udev->dev, "Failed to set configuration %d\n", + c->desc.bConfigurationValue); + return -ENODEV; + } + + return 0; +} + +static struct usb_device_driver rtl8152_cfgselector_driver = { + .name = MODULENAME "-cfgselector", + .probe = rtl8152_cfgselector_probe, + .id_table = rtl8152_table, + .generic_subclass = 1, +}; + +static int __init rtl8152_driver_init(void) +{ + int ret; + + ret = usb_register_device_driver(&rtl8152_cfgselector_driver, THIS_MODULE); + if (ret) + return ret; + return usb_register(&rtl8152_driver); +} + +static void __exit rtl8152_driver_exit(void) +{ + usb_deregister(&rtl8152_driver); + usb_deregister_device_driver(&rtl8152_cfgselector_driver); +} + +module_init(rtl8152_driver_init); +module_exit(rtl8152_driver_exit); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 64a9a80b2309..283ffddda821 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -555,32 +555,30 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags) /*-------------------------------------------------------------------------*/ -static inline void rx_process (struct usbnet *dev, struct sk_buff *skb) +static inline int rx_process(struct usbnet *dev, struct sk_buff *skb) { if (dev->driver_info->rx_fixup && !dev->driver_info->rx_fixup (dev, skb)) { /* With RX_ASSEMBLE, rx_fixup() must update counters */ if (!(dev->driver_info->flags & FLAG_RX_ASSEMBLE)) dev->net->stats.rx_errors++; - goto done; + return -EPROTO; } // else network stack removes extra byte if we forced a short packet /* all data was already cloned from skb inside the driver */ if (dev->driver_info->flags & FLAG_MULTI_PACKET) - goto done; + return -EALREADY; if (skb->len < ETH_HLEN) { dev->net->stats.rx_errors++; dev->net->stats.rx_length_errors++; netif_dbg(dev, rx_err, dev->net, "rx length %d\n", skb->len); - } else { - usbnet_skb_return(dev, skb); - return; + return -EPROTO; } -done: - skb_queue_tail(&dev->done, skb); + usbnet_skb_return(dev, skb); + return 0; } /*-------------------------------------------------------------------------*/ @@ -1514,6 +1512,14 @@ err: return ret; } +static inline void usb_free_skb(struct sk_buff *skb) +{ + struct skb_data *entry = (struct skb_data *)skb->cb; + + usb_free_urb(entry->urb); + dev_kfree_skb(skb); +} + /*-------------------------------------------------------------------------*/ // tasklet (work deferred from completions, in_irq) or timer @@ -1528,15 +1534,14 @@ static void usbnet_bh (struct timer_list *t) entry = (struct skb_data *) skb->cb; switch (entry->state) { case rx_done: - entry->state = rx_cleanup; - rx_process (dev, skb); + if (rx_process(dev, skb)) + usb_free_skb(skb); continue; case tx_done: kfree(entry->urb->sg); fallthrough; case rx_cleanup: - usb_free_urb (entry->urb); - dev_kfree_skb (skb); + usb_free_skb(skb); continue; default: netdev_dbg(dev->net, "bogus skb state %d\n", entry->state); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3de24cfb7a3d..1697bd87fc06 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -189,7 +189,7 @@ struct btf_field_kptr { u32 btf_id; }; -struct btf_field_list_head { +struct btf_field_graph_root { struct btf *btf; u32 value_btf_id; u32 node_offset; @@ -201,7 +201,7 @@ struct btf_field { enum btf_field_type type; union { struct btf_field_kptr kptr; - struct btf_field_list_head list_head; + struct btf_field_graph_root graph_root; }; }; @@ -2795,10 +2795,18 @@ struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 +#define MAX_BPRINTF_BUF 1024 + +struct bpf_bprintf_data { + u32 *bin_args; + char *buf; + bool get_bin_args; + bool get_buf; +}; int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, - u32 **bin_buf, u32 num_args); -void bpf_bprintf_cleanup(void); + u32 num_args, struct bpf_bprintf_data *data); +void bpf_bprintf_cleanup(struct bpf_bprintf_data *data); /* the implementation of the opaque uapi struct bpf_dynptr */ struct bpf_dynptr_kern { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 53d175cbaa02..127058cfec47 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -92,6 +92,26 @@ struct bpf_reg_state { u32 subprogno; /* for PTR_TO_FUNC */ }; + /* For scalar types (SCALAR_VALUE), this represents our knowledge of + * the actual value. + * For pointer types, this represents the variable part of the offset + * from the pointed-to object, and is shared with all bpf_reg_states + * with the same id as us. + */ + struct tnum var_off; + /* Used to determine if any memory access using this register will + * result in a bad access. + * These refer to the same value as var_off, not necessarily the actual + * contents of the register. + */ + s64 smin_value; /* minimum possible (s64)value */ + s64 smax_value; /* maximum possible (s64)value */ + u64 umin_value; /* minimum possible (u64)value */ + u64 umax_value; /* maximum possible (u64)value */ + s32 s32_min_value; /* minimum possible (s32)value */ + s32 s32_max_value; /* maximum possible (s32)value */ + u32 u32_min_value; /* minimum possible (u32)value */ + u32 u32_max_value; /* maximum possible (u32)value */ /* For PTR_TO_PACKET, used to find other pointers with the same variable * offset, so they can share range knowledge. * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we @@ -144,26 +164,6 @@ struct bpf_reg_state { * allowed and has the same effect as bpf_sk_release(sk). */ u32 ref_obj_id; - /* For scalar types (SCALAR_VALUE), this represents our knowledge of - * the actual value. - * For pointer types, this represents the variable part of the offset - * from the pointed-to object, and is shared with all bpf_reg_states - * with the same id as us. - */ - struct tnum var_off; - /* Used to determine if any memory access using this register will - * result in a bad access. - * These refer to the same value as var_off, not necessarily the actual - * contents of the register. - */ - s64 smin_value; /* minimum possible (s64)value */ - s64 smax_value; /* maximum possible (s64)value */ - u64 umin_value; /* minimum possible (u64)value */ - u64 umax_value; /* maximum possible (u64)value */ - s32 s32_min_value; /* minimum possible (s32)value */ - s32 s32_max_value; /* maximum possible (s32)value */ - u32 u32_min_value; /* minimum possible (u32)value */ - u32 u32_max_value; /* maximum possible (u32)value */ /* parentage chain for liveness checking */ struct bpf_reg_state *parent; /* Inside the callee two registers can be both PTR_TO_STACK like diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 9e0a76fc7de9..d0da303f6634 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -802,12 +802,17 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, struct phy_device; struct phy_tdr_config; +struct phy_plca_cfg; +struct phy_plca_status; /** * struct ethtool_phy_ops - Optional PHY device options * @get_sset_count: Get number of strings that @get_strings will write. * @get_strings: Return a set of strings that describe the requested objects * @get_stats: Return extended statistics about the PHY device. + * @get_plca_cfg: Return PLCA configuration. + * @set_plca_cfg: Set PLCA configuration. + * @get_plca_status: Get PLCA configuration. * @start_cable_test: Start a cable test * @start_cable_test_tdr: Start a Time Domain Reflectometry cable test * @@ -819,6 +824,13 @@ struct ethtool_phy_ops { int (*get_strings)(struct phy_device *dev, u8 *data); int (*get_stats)(struct phy_device *dev, struct ethtool_stats *stats, u64 *data); + int (*get_plca_cfg)(struct phy_device *dev, + struct phy_plca_cfg *plca_cfg); + int (*set_plca_cfg)(struct phy_device *dev, + const struct phy_plca_cfg *plca_cfg, + struct netlink_ext_ack *extack); + int (*get_plca_status)(struct phy_device *dev, + struct phy_plca_status *plca_st); int (*start_cable_test)(struct phy_device *phydev, struct netlink_ext_ack *extack); int (*start_cable_test_tdr)(struct phy_device *phydev, diff --git a/include/linux/mdio-bitbang.h b/include/linux/mdio-bitbang.h index 373630fe5c28..cffabdbce075 100644 --- a/include/linux/mdio-bitbang.h +++ b/include/linux/mdio-bitbang.h @@ -38,8 +38,10 @@ struct mdiobb_ctrl { u8 op_c22_write; }; -int mdiobb_read(struct mii_bus *bus, int phy, int reg); -int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val); +int mdiobb_read_c22(struct mii_bus *bus, int phy, int reg); +int mdiobb_write_c22(struct mii_bus *bus, int phy, int reg, u16 val); +int mdiobb_read_c45(struct mii_bus *bus, int devad, int phy, int reg); +int mdiobb_write_c45(struct mii_bus *bus, int devad, int phy, int reg, u16 val); /* The returned bus is not yet registered with the phy layer. */ struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl); diff --git a/include/linux/mdio.h b/include/linux/mdio.h index f7fbbf3069e7..220f3ca8702d 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -423,6 +423,21 @@ int mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask, u16 set); int mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, u16 mask, u16 set); +int __mdiobus_c45_read(struct mii_bus *bus, int addr, int devad, u32 regnum); +int mdiobus_c45_read(struct mii_bus *bus, int addr, int devad, u32 regnum); +int mdiobus_c45_read_nested(struct mii_bus *bus, int addr, int devad, + u32 regnum); +int __mdiobus_c45_write(struct mii_bus *bus, int addr, int devad, u32 regnum, + u16 val); +int mdiobus_c45_write(struct mii_bus *bus, int addr, int devad, u32 regnum, + u16 val); +int mdiobus_c45_write_nested(struct mii_bus *bus, int addr, int devad, + u32 regnum, u16 val); +int mdiobus_c45_modify(struct mii_bus *bus, int addr, int devad, u32 regnum, + u16 mask, u16 set); + +int mdiobus_c45_modify_changed(struct mii_bus *bus, int addr, int devad, + u32 regnum, u16 mask, u16 set); static inline int mdiodev_read(struct mdio_device *mdiodev, u32 regnum) { @@ -448,11 +463,6 @@ static inline int mdiodev_modify_changed(struct mdio_device *mdiodev, mask, set); } -static inline u32 mdiobus_c45_addr(int devad, u16 regnum) -{ - return MII_ADDR_C45 | devad << MII_DEVADDR_C45_SHIFT | regnum; -} - static inline u16 mdiobus_c45_regad(u32 regnum) { return FIELD_GET(MII_REGADDR_C45_MASK, regnum); @@ -463,29 +473,19 @@ static inline u16 mdiobus_c45_devad(u32 regnum) return FIELD_GET(MII_DEVADDR_C45_MASK, regnum); } -static inline int __mdiobus_c45_read(struct mii_bus *bus, int prtad, int devad, - u16 regnum) -{ - return __mdiobus_read(bus, prtad, mdiobus_c45_addr(devad, regnum)); -} - -static inline int __mdiobus_c45_write(struct mii_bus *bus, int prtad, int devad, - u16 regnum, u16 val) +static inline int mdiodev_c45_modify(struct mdio_device *mdiodev, int devad, + u32 regnum, u16 mask, u16 set) { - return __mdiobus_write(bus, prtad, mdiobus_c45_addr(devad, regnum), - val); + return mdiobus_c45_modify(mdiodev->bus, mdiodev->addr, devad, regnum, + mask, set); } -static inline int mdiobus_c45_read(struct mii_bus *bus, int prtad, int devad, - u16 regnum) -{ - return mdiobus_read(bus, prtad, mdiobus_c45_addr(devad, regnum)); -} - -static inline int mdiobus_c45_write(struct mii_bus *bus, int prtad, int devad, - u16 regnum, u16 val) +static inline int mdiodev_c45_modify_changed(struct mdio_device *mdiodev, + int devad, u32 regnum, u16 mask, + u16 set) { - return mdiobus_write(bus, prtad, mdiobus_c45_addr(devad, regnum), val); + return mdiobus_c45_modify_changed(mdiodev->bus, mdiodev->addr, devad, + regnum, mask, set); } static inline int mdiodev_c45_read(struct mdio_device *mdiodev, int devad, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 76ef2e4fde38..b957b8f22a6b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -100,6 +100,8 @@ enum { }; enum { + MLX5_REG_SBPR = 0xb001, + MLX5_REG_SBCM = 0xb002, MLX5_REG_QPTS = 0x4002, MLX5_REG_QETCR = 0x4005, MLX5_REG_QTCT = 0x400a, @@ -308,6 +310,7 @@ struct mlx5_cmd { struct workqueue_struct *wq; struct semaphore sem; struct semaphore pages_sem; + struct semaphore throttle_sem; int mode; u16 allowed_opcode; struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS]; @@ -1200,6 +1203,11 @@ static inline bool mlx5_core_is_vf(const struct mlx5_core_dev *dev) return dev->coredev_type == MLX5_COREDEV_VF; } +static inline bool mlx5_core_is_management_pf(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, num_ports) == 1 && !MLX5_CAP_GEN(dev, native_port_num); +} + static inline bool mlx5_core_is_ecpf(const struct mlx5_core_dev *dev) { return dev->caps.embedded_cpu; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a9ee7bc59c90..a84bdeeed2c6 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -11000,6 +11000,67 @@ struct mlx5_ifc_pbmc_reg_bits { u8 reserved_at_2e0[0x80]; }; +struct mlx5_ifc_sbpr_reg_bits { + u8 desc[0x1]; + u8 snap[0x1]; + u8 reserved_at_2[0x4]; + u8 dir[0x2]; + u8 reserved_at_8[0x14]; + u8 pool[0x4]; + + u8 infi_size[0x1]; + u8 reserved_at_21[0x7]; + u8 size[0x18]; + + u8 reserved_at_40[0x1c]; + u8 mode[0x4]; + + u8 reserved_at_60[0x8]; + u8 buff_occupancy[0x18]; + + u8 clr[0x1]; + u8 reserved_at_81[0x7]; + u8 max_buff_occupancy[0x18]; + + u8 reserved_at_a0[0x8]; + u8 ext_buff_occupancy[0x18]; +}; + +struct mlx5_ifc_sbcm_reg_bits { + u8 desc[0x1]; + u8 snap[0x1]; + u8 reserved_at_2[0x6]; + u8 local_port[0x8]; + u8 pnat[0x2]; + u8 pg_buff[0x6]; + u8 reserved_at_18[0x6]; + u8 dir[0x2]; + + u8 reserved_at_20[0x1f]; + u8 exc[0x1]; + + u8 reserved_at_40[0x40]; + + u8 reserved_at_80[0x8]; + u8 buff_occupancy[0x18]; + + u8 clr[0x1]; + u8 reserved_at_a1[0x7]; + u8 max_buff_occupancy[0x18]; + + u8 reserved_at_c0[0x8]; + u8 min_buff[0x18]; + + u8 infi_max[0x1]; + u8 reserved_at_e1[0x7]; + u8 max_buff[0x18]; + + u8 reserved_at_100[0x20]; + + u8 reserved_at_120[0x1c]; + u8 pool[0x4]; +}; + struct mlx5_ifc_qtct_reg_bits { u8 reserved_at_0[0x8]; u8 port_number[0x8]; diff --git a/include/linux/netlink.h b/include/linux/netlink.h index d81bde5a5844..38f6334f408c 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -263,6 +263,10 @@ struct netlink_callback { }; }; +#define NL_ASSET_DUMP_CTX_FITS(type_name) \ + BUILD_BUG_ON(sizeof(type_name) > \ + sizeof_field(struct netlink_callback, ctx)) + struct netlink_notify { struct net *net; u32 portid; diff --git a/include/linux/phy.h b/include/linux/phy.h index 6378c997ded5..b3cf1e08e880 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -45,6 +45,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1s_p2mp_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_fibre_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_all_ports_features) __ro_after_init; @@ -54,6 +55,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_ini #define PHY_BASIC_FEATURES ((unsigned long *)&phy_basic_features) #define PHY_BASIC_T1_FEATURES ((unsigned long *)&phy_basic_t1_features) +#define PHY_BASIC_T1S_P2MP_FEATURES ((unsigned long *)&phy_basic_t1s_p2mp_features) #define PHY_GBIT_FEATURES ((unsigned long *)&phy_gbit_features) #define PHY_GBIT_FIBRE_FEATURES ((unsigned long *)&phy_gbit_fibre_features) #define PHY_GBIT_ALL_PORTS_FEATURES ((unsigned long *)&phy_gbit_all_ports_features) @@ -66,6 +68,7 @@ extern const int phy_fibre_port_array[1]; extern const int phy_all_ports_features_array[7]; extern const int phy_10_100_features_array[4]; extern const int phy_basic_t1_features_array[3]; +extern const int phy_basic_t1s_p2mp_features_array[2]; extern const int phy_gbit_features_array[2]; extern const int phy_10gbit_features_array[1]; @@ -364,6 +367,11 @@ struct mii_bus { int (*read)(struct mii_bus *bus, int addr, int regnum); /** @write: Perform a write transfer on the bus */ int (*write)(struct mii_bus *bus, int addr, int regnum, u16 val); + /** @read_c45: Perform a C45 read transfer on the bus */ + int (*read_c45)(struct mii_bus *bus, int addr, int devnum, int regnum); + /** @write_c45: Perform a C45 write transfer on the bus */ + int (*write_c45)(struct mii_bus *bus, int addr, int devnum, + int regnum, u16 val); /** @reset: Perform a reset of the bus */ int (*reset)(struct mii_bus *bus); @@ -739,6 +747,9 @@ struct phy_device { #endif }; +/* Generic phy_device::dev_flags */ +#define PHY_F_NO_IRQ 0x80000000 + static inline struct phy_device *to_phy_device(const struct device *dev) { return container_of(to_mdio_device(dev), struct phy_device, mdio); @@ -766,6 +777,63 @@ struct phy_tdr_config { #define PHY_PAIR_ALL -1 /** + * struct phy_plca_cfg - Configuration of the PLCA (Physical Layer Collision + * Avoidance) Reconciliation Sublayer. + * + * @version: read-only PLCA register map version. -1 = not available. Ignored + * when setting the configuration. Format is the same as reported by the PLCA + * IDVER register (31.CA00). -1 = not available. + * @enabled: PLCA configured mode (enabled/disabled). -1 = not available / don't + * set. 0 = disabled, anything else = enabled. + * @node_id: the PLCA local node identifier. -1 = not available / don't set. + * Allowed values [0 .. 254]. 255 = node disabled. + * @node_cnt: the PLCA node count (maximum number of nodes having a TO). Only + * meaningful for the coordinator (node_id = 0). -1 = not available / don't + * set. Allowed values [1 .. 255]. + * @to_tmr: The value of the PLCA to_timer in bit-times, which determines the + * PLCA transmit opportunity window opening. See IEEE802.3 Clause 148 for + * more details. The to_timer shall be set equal over all nodes. + * -1 = not available / don't set. Allowed values [0 .. 255]. + * @burst_cnt: controls how many additional frames a node is allowed to send in + * single transmit opportunity (TO). The default value of 0 means that the + * node is allowed exactly one frame per TO. A value of 1 allows two frames + * per TO, and so on. -1 = not available / don't set. + * Allowed values [0 .. 255]. + * @burst_tmr: controls how many bit times to wait for the MAC to send a new + * frame before interrupting the burst. This value should be set to a value + * greater than the MAC inter-packet gap (which is typically 96 bits). + * -1 = not available / don't set. Allowed values [0 .. 255]. + * + * A structure containing configuration parameters for setting/getting the PLCA + * RS configuration. The driver does not need to implement all the parameters, + * but should report what is actually used. + */ +struct phy_plca_cfg { + int version; + int enabled; + int node_id; + int node_cnt; + int to_tmr; + int burst_cnt; + int burst_tmr; +}; + +/** + * struct phy_plca_status - Status of the PLCA (Physical Layer Collision + * Avoidance) Reconciliation Sublayer. + * + * @pst: The PLCA status as reported by the PST bit in the PLCA STATUS + * register(31.CA03), indicating BEACON activity. + * + * A structure containing status information of the PLCA RS configuration. + * The driver does not need to implement all the parameters, but should report + * what is actually used. + */ +struct phy_plca_status { + bool pst; +}; + +/** * struct phy_driver - Driver structure for a particular PHY type * * @mdiodrv: Data common to all MDIO devices @@ -976,6 +1044,17 @@ struct phy_driver { int (*get_sqi)(struct phy_device *dev); /** @get_sqi_max: Get the maximum signal quality indication */ int (*get_sqi_max)(struct phy_device *dev); + + /* PLCA RS interface */ + /** @get_plca_cfg: Return the current PLCA configuration */ + int (*get_plca_cfg)(struct phy_device *dev, + struct phy_plca_cfg *plca_cfg); + /** @set_plca_cfg: Set the PLCA configuration */ + int (*set_plca_cfg)(struct phy_device *dev, + const struct phy_plca_cfg *plca_cfg); + /** @get_plca_status: Return the current PLCA status info */ + int (*get_plca_status)(struct phy_device *dev, + struct phy_plca_status *plca_st); }; #define to_phy_driver(d) container_of(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) @@ -1674,6 +1753,12 @@ int genphy_c45_loopback(struct phy_device *phydev, bool enable); int genphy_c45_pma_resume(struct phy_device *phydev); int genphy_c45_pma_suspend(struct phy_device *phydev); int genphy_c45_fast_retrain(struct phy_device *phydev, bool enable); +int genphy_c45_plca_get_cfg(struct phy_device *phydev, + struct phy_plca_cfg *plca_cfg); +int genphy_c45_plca_set_cfg(struct phy_device *phydev, + const struct phy_plca_cfg *plca_cfg); +int genphy_c45_plca_get_status(struct phy_device *phydev, + struct phy_plca_status *plca_st); /* Generic C45 PHY driver */ extern struct phy_driver genphy_c45_driver; @@ -1772,6 +1857,13 @@ int phy_ethtool_get_strings(struct phy_device *phydev, u8 *data); int phy_ethtool_get_sset_count(struct phy_device *phydev); int phy_ethtool_get_stats(struct phy_device *phydev, struct ethtool_stats *stats, u64 *data); +int phy_ethtool_get_plca_cfg(struct phy_device *phydev, + struct phy_plca_cfg *plca_cfg); +int phy_ethtool_set_plca_cfg(struct phy_device *phydev, + const struct phy_plca_cfg *plca_cfg, + struct netlink_ext_ack *extack); +int phy_ethtool_get_plca_status(struct phy_device *phydev, + struct phy_plca_status *plca_st); static inline int phy_package_read(struct phy_device *phydev, u32 regnum) { diff --git a/include/net/devlink.h b/include/net/devlink.h index 6a2e4f21779f..425ecef431b7 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1647,6 +1647,8 @@ static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, return devlink_alloc_ns(ops, priv_size, &init_net, dev); } void devlink_set_features(struct devlink *devlink, u64 features); +int devl_register(struct devlink *devlink); +void devl_unregister(struct devlink *devlink); void devlink_register(struct devlink *devlink); void devlink_unregister(struct devlink *devlink); void devlink_free(struct devlink *devlink); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 464ca3f01fe7..bc1a3d232ae4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2001,6 +2001,9 @@ union bpf_attr { * sending the packet. This flag was added for GRE * encapsulation, but might be used with other protocols * as well in the future. + * **BPF_F_NO_TUNNEL_KEY** + * Add a flag to tunnel metadata indicating that no tunnel + * key should be set in the resulting tunnel header. * * Here is a typical usage on the transmit path: * @@ -5764,6 +5767,7 @@ enum { BPF_F_ZERO_CSUM_TX = (1ULL << 1), BPF_F_DONT_FRAGMENT = (1ULL << 2), BPF_F_SEQ_NUMBER = (1ULL << 3), + BPF_F_NO_TUNNEL_KEY = (1ULL << 4), }; /* BPF_FUNC_skb_get_tunnel_key flags. */ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 58e587ba0450..6389953c32cf 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1183,7 +1183,7 @@ struct ethtool_rxnfc { __u32 rule_cnt; __u32 rss_context; }; - __u32 rule_locs[0]; + __u32 rule_locs[]; }; @@ -1741,6 +1741,9 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT = 96, ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT = 97, ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT = 98, + ETHTOOL_LINK_MODE_10baseT1S_Full_BIT = 99, + ETHTOOL_LINK_MODE_10baseT1S_Half_BIT = 100, + ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT = 101, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 5799a9db034e..75b3d6d476ff 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -52,6 +52,9 @@ enum { ETHTOOL_MSG_PSE_GET, ETHTOOL_MSG_PSE_SET, ETHTOOL_MSG_RSS_GET, + ETHTOOL_MSG_PLCA_GET_CFG, + ETHTOOL_MSG_PLCA_SET_CFG, + ETHTOOL_MSG_PLCA_GET_STATUS, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -99,6 +102,9 @@ enum { ETHTOOL_MSG_MODULE_NTF, ETHTOOL_MSG_PSE_GET_REPLY, ETHTOOL_MSG_RSS_GET_REPLY, + ETHTOOL_MSG_PLCA_GET_CFG_REPLY, + ETHTOOL_MSG_PLCA_GET_STATUS_REPLY, + ETHTOOL_MSG_PLCA_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -894,6 +900,25 @@ enum { ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), }; +/* PLCA */ + +enum { + ETHTOOL_A_PLCA_UNSPEC, + ETHTOOL_A_PLCA_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PLCA_VERSION, /* u16 */ + ETHTOOL_A_PLCA_ENABLED, /* u8 */ + ETHTOOL_A_PLCA_STATUS, /* u8 */ + ETHTOOL_A_PLCA_NODE_CNT, /* u32 */ + ETHTOOL_A_PLCA_NODE_ID, /* u32 */ + ETHTOOL_A_PLCA_TO_TMR, /* u32 */ + ETHTOOL_A_PLCA_BURST_CNT, /* u32 */ + ETHTOOL_A_PLCA_BURST_TMR, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_PLCA_CNT, + ETHTOOL_A_PLCA_MAX = (__ETHTOOL_A_PLCA_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/include/uapi/linux/ioam6.h b/include/uapi/linux/ioam6.h index ac4de376f0ce..8f72b24fefb3 100644 --- a/include/uapi/linux/ioam6.h +++ b/include/uapi/linux/ioam6.h @@ -127,7 +127,7 @@ struct ioam6_trace_hdr { #endif #define IOAM6_TRACE_DATA_SIZE_MAX 244 - __u8 data[0]; + __u8 data[]; } __attribute__((packed)); #endif /* _UAPI_LINUX_IOAM6_H */ diff --git a/include/uapi/linux/rpl.h b/include/uapi/linux/rpl.h index 708adddf9f13..7c8970e5b84b 100644 --- a/include/uapi/linux/rpl.h +++ b/include/uapi/linux/rpl.h @@ -37,8 +37,8 @@ struct ipv6_rpl_sr_hdr { #endif union { - struct in6_addr addr[0]; - __u8 data[0]; + __DECLARE_FLEX_ARRAY(struct in6_addr, addr); + __DECLARE_FLEX_ARRAY(__u8, data); } segments; } __attribute__((packed)); diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index b39a46e8fb08..373c3c2c75bc 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -580,8 +580,8 @@ static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_att raw_spin_lock_init(&smap->buckets[i].lock); } - smap->elem_size = - sizeof(struct bpf_local_storage_elem) + attr->value_size; + smap->elem_size = offsetof(struct bpf_local_storage_elem, + sdata.data[attr->value_size]); return smap; } diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index f7dd8af06413..578cee398550 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3228,7 +3228,7 @@ struct btf_field_info { struct { const char *node_name; u32 value_btf_id; - } list_head; + } graph_root; }; }; @@ -3335,8 +3335,8 @@ static int btf_find_list_head(const struct btf *btf, const struct btf_type *pt, return -EINVAL; info->type = BPF_LIST_HEAD; info->off = off; - info->list_head.value_btf_id = id; - info->list_head.node_name = list_node; + info->graph_root.value_btf_id = id; + info->graph_root.node_name = list_node; return BTF_FIELD_FOUND; } @@ -3604,13 +3604,14 @@ static int btf_parse_list_head(const struct btf *btf, struct btf_field *field, u32 offset; int i; - t = btf_type_by_id(btf, info->list_head.value_btf_id); + t = btf_type_by_id(btf, info->graph_root.value_btf_id); /* We've already checked that value_btf_id is a struct type. We * just need to figure out the offset of the list_node, and * verify its type. */ for_each_member(i, t, member) { - if (strcmp(info->list_head.node_name, __btf_name_by_offset(btf, member->name_off))) + if (strcmp(info->graph_root.node_name, + __btf_name_by_offset(btf, member->name_off))) continue; /* Invalid BTF, two members with same name */ if (n) @@ -3627,9 +3628,9 @@ static int btf_parse_list_head(const struct btf *btf, struct btf_field *field, if (offset % __alignof__(struct bpf_list_node)) return -EINVAL; - field->list_head.btf = (struct btf *)btf; - field->list_head.value_btf_id = info->list_head.value_btf_id; - field->list_head.node_offset = offset; + field->graph_root.btf = (struct btf *)btf; + field->graph_root.value_btf_id = info->graph_root.value_btf_id; + field->graph_root.node_offset = offset; } if (!n) return -ENOENT; @@ -3736,11 +3737,11 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) if (!(rec->fields[i].type & BPF_LIST_HEAD)) continue; - btf_id = rec->fields[i].list_head.value_btf_id; + btf_id = rec->fields[i].graph_root.value_btf_id; meta = btf_find_struct_meta(btf, btf_id); if (!meta) return -EFAULT; - rec->fields[i].list_head.value_rec = meta->record; + rec->fields[i].graph_root.value_rec = meta->record; if (!(rec->field_mask & BPF_LIST_NODE)) continue; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index af30c6cbd65d..458db2db2f81 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -756,19 +756,20 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, /* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary * arguments representation. */ -#define MAX_BPRINTF_BUF_LEN 512 +#define MAX_BPRINTF_BIN_ARGS 512 /* Support executing three nested bprintf helper calls on a given CPU */ #define MAX_BPRINTF_NEST_LEVEL 3 struct bpf_bprintf_buffers { - char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN]; + char bin_args[MAX_BPRINTF_BIN_ARGS]; + char buf[MAX_BPRINTF_BUF]; }; -static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs); + +static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs); static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); -static int try_get_fmt_tmp_buf(char **tmp_buf) +static int try_get_buffers(struct bpf_bprintf_buffers **bufs) { - struct bpf_bprintf_buffers *bufs; int nest_level; preempt_disable(); @@ -778,18 +779,19 @@ static int try_get_fmt_tmp_buf(char **tmp_buf) preempt_enable(); return -EBUSY; } - bufs = this_cpu_ptr(&bpf_bprintf_bufs); - *tmp_buf = bufs->tmp_bufs[nest_level - 1]; + *bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]); return 0; } -void bpf_bprintf_cleanup(void) +void bpf_bprintf_cleanup(struct bpf_bprintf_data *data) { - if (this_cpu_read(bpf_bprintf_nest_level)) { - this_cpu_dec(bpf_bprintf_nest_level); - preempt_enable(); - } + if (!data->bin_args && !data->buf) + return; + if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0)) + return; + this_cpu_dec(bpf_bprintf_nest_level); + preempt_enable(); } /* @@ -798,18 +800,20 @@ void bpf_bprintf_cleanup(void) * Returns a negative value if fmt is an invalid format string or 0 otherwise. * * This can be used in two ways: - * - Format string verification only: when bin_args is NULL + * - Format string verification only: when data->get_bin_args is false * - Arguments preparation: in addition to the above verification, it writes in - * bin_args a binary representation of arguments usable by bstr_printf where - * pointers from BPF have been sanitized. + * data->bin_args a binary representation of arguments usable by bstr_printf + * where pointers from BPF have been sanitized. * * In argument preparation mode, if 0 is returned, safe temporary buffers are * allocated and bpf_bprintf_cleanup should be called to free them after use. */ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, - u32 **bin_args, u32 num_args) + u32 num_args, struct bpf_bprintf_data *data) { + bool get_buffers = (data->get_bin_args && num_args) || data->get_buf; char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end; + struct bpf_bprintf_buffers *buffers = NULL; size_t sizeof_cur_arg, sizeof_cur_ip; int err, i, num_spec = 0; u64 cur_arg; @@ -820,14 +824,19 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, return -EINVAL; fmt_size = fmt_end - fmt; - if (bin_args) { - if (num_args && try_get_fmt_tmp_buf(&tmp_buf)) - return -EBUSY; + if (get_buffers && try_get_buffers(&buffers)) + return -EBUSY; - tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN; - *bin_args = (u32 *)tmp_buf; + if (data->get_bin_args) { + if (num_args) + tmp_buf = buffers->bin_args; + tmp_buf_end = tmp_buf + MAX_BPRINTF_BIN_ARGS; + data->bin_args = (u32 *)tmp_buf; } + if (data->get_buf) + data->buf = buffers->buf; + for (i = 0; i < fmt_size; i++) { if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { err = -EINVAL; @@ -1021,31 +1030,33 @@ nocopy_fmt: err = 0; out: if (err) - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(data); return err; } BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt, - const void *, data, u32, data_len) + const void *, args, u32, data_len) { + struct bpf_bprintf_data data = { + .get_bin_args = true, + }; int err, num_args; - u32 *bin_args; if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) + (data_len && !args)) return -EINVAL; num_args = data_len / 8; /* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we * can safely give an unbounded size. */ - err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args); + err = bpf_bprintf_prepare(fmt, UINT_MAX, args, num_args, &data); if (err < 0) return err; - err = bstr_printf(str, str_size, fmt, bin_args); + err = bstr_printf(str, str_size, fmt, data.bin_args); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return err + 1; } @@ -1745,12 +1756,12 @@ unlock: while (head != orig_head) { void *obj = head; - obj -= field->list_head.node_offset; + obj -= field->graph_root.node_offset; head = head->next; /* The contained type can also have resources, including a * bpf_list_head which needs to be freed. */ - bpf_obj_free_fields(field->list_head.value_rec, obj); + bpf_obj_free_fields(field->graph_root.value_rec, obj); /* bpf_mem_free requires migrate_disable(), since we can be * called from map free path as well apart from BPF program (as * part of map ops doing bpf_obj_free_fields). diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 64131f88c553..35ffd808f281 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5319,7 +5319,6 @@ static struct ctl_table bpf_syscall_table[] = { { .procname = "bpf_stats_enabled", .data = &bpf_stats_enabled_key.key, - .maxlen = sizeof(bpf_stats_enabled_key), .mode = 0644, .proc_handler = bpf_stats_handler, }, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 85f96c1e9f62..fa4c911603e9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1404,9 +1404,11 @@ static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm) */ static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) { - /* Clear id, off, and union(map_ptr, range) */ + /* Clear off and union(map_ptr, range) */ memset(((u8 *)reg) + sizeof(reg->type), 0, offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type)); + reg->id = 0; + reg->ref_obj_id = 0; ___mark_reg_known(reg, imm); } @@ -1752,11 +1754,13 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env, struct bpf_reg_state *reg) { /* - * Clear type, id, off, and union(map_ptr, range) and + * Clear type, off, and union(map_ptr, range) and * padding between 'type' and union */ memset(reg, 0, offsetof(struct bpf_reg_state, var_off)); reg->type = SCALAR_VALUE; + reg->id = 0; + reg->ref_obj_id = 0; reg->var_off = tnum_unknown; reg->frameno = 0; reg->precise = !env->bpf_capable; @@ -7614,6 +7618,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, struct bpf_reg_state *fmt_reg = ®s[BPF_REG_3]; struct bpf_reg_state *data_len_reg = ®s[BPF_REG_5]; struct bpf_map *fmt_map = fmt_reg->map_ptr; + struct bpf_bprintf_data data = {}; int err, fmt_map_off, num_args; u64 fmt_addr; char *fmt; @@ -7638,7 +7643,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we * can focus on validating the format specifiers. */ - err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args); + err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data); if (err < 0) verbose(env, "Invalid format string\n"); @@ -8773,21 +8778,22 @@ static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env, field = meta->arg_list_head.field; - et = btf_type_by_id(field->list_head.btf, field->list_head.value_btf_id); + et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id); t = btf_type_by_id(reg->btf, reg->btf_id); - if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->list_head.btf, - field->list_head.value_btf_id, true)) { + if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf, + field->graph_root.value_btf_id, true)) { verbose(env, "operation on bpf_list_head expects arg#1 bpf_list_node at offset=%d " "in struct %s, but arg is at offset=%d in struct %s\n", - field->list_head.node_offset, btf_name_by_offset(field->list_head.btf, et->name_off), + field->graph_root.node_offset, + btf_name_by_offset(field->graph_root.btf, et->name_off), list_node_off, btf_name_by_offset(reg->btf, t->name_off)); return -EINVAL; } - if (list_node_off != field->list_head.node_offset) { + if (list_node_off != field->graph_root.node_offset) { verbose(env, "arg#1 offset=%d, but expected bpf_list_node at offset=%d in struct %s\n", - list_node_off, field->list_head.node_offset, - btf_name_by_offset(field->list_head.btf, et->name_off)); + list_node_off, field->graph_root.node_offset, + btf_name_by_offset(field->graph_root.btf, et->name_off)); return -EINVAL; } /* Set arg#1 for expiration after unlock */ @@ -9229,9 +9235,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC; - regs[BPF_REG_0].btf = field->list_head.btf; - regs[BPF_REG_0].btf_id = field->list_head.value_btf_id; - regs[BPF_REG_0].off = field->list_head.node_offset; + regs[BPF_REG_0].btf = field->graph_root.btf; + regs[BPF_REG_0].btf_id = field->graph_root.value_btf_id; + regs[BPF_REG_0].off = field->graph_root.node_offset; } else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED; @@ -12950,6 +12956,13 @@ static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap) { unsigned int i; + /* either both IDs should be set or both should be zero */ + if (!!old_id != !!cur_id) + return false; + + if (old_id == 0) /* cur_id == 0 as well */ + return true; + for (i = 0; i < BPF_ID_MAP_SIZE; i++) { if (!idmap[i].old) { /* Reached an empty slot; haven't seen this id before */ @@ -13061,79 +13074,74 @@ next: } } +static bool regs_exact(const struct bpf_reg_state *rold, + const struct bpf_reg_state *rcur, + struct bpf_id_pair *idmap) +{ + return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && + check_ids(rold->id, rcur->id, idmap) && + check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap); +} + /* Returns true if (rold safe implies rcur safe) */ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, struct bpf_reg_state *rcur, struct bpf_id_pair *idmap) { - bool equal; - if (!(rold->live & REG_LIVE_READ)) /* explored state didn't use this */ return true; - - equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0; - if (rold->type == NOT_INIT) /* explored state can't have used this */ return true; if (rcur->type == NOT_INIT) return false; + + /* Enforce that register types have to match exactly, including their + * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general + * rule. + * + * One can make a point that using a pointer register as unbounded + * SCALAR would be technically acceptable, but this could lead to + * pointer leaks because scalars are allowed to leak while pointers + * are not. We could make this safe in special cases if root is + * calling us, but it's probably not worth the hassle. + * + * Also, register types that are *not* MAYBE_NULL could technically be + * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE + * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point + * to the same map). + * However, if the old MAYBE_NULL register then got NULL checked, + * doing so could have affected others with the same id, and we can't + * check for that because we lost the id when we converted to + * a non-MAYBE_NULL variant. + * So, as a general rule we don't allow mixing MAYBE_NULL and + * non-MAYBE_NULL registers as well. + */ + if (rold->type != rcur->type) + return false; + switch (base_type(rold->type)) { case SCALAR_VALUE: - if (equal) + if (regs_exact(rold, rcur, idmap)) return true; if (env->explore_alu_limits) return false; - if (rcur->type == SCALAR_VALUE) { - if (!rold->precise) - return true; - /* new val must satisfy old val knowledge */ - return range_within(rold, rcur) && - tnum_in(rold->var_off, rcur->var_off); - } else { - /* We're trying to use a pointer in place of a scalar. - * Even if the scalar was unbounded, this could lead to - * pointer leaks because scalars are allowed to leak - * while pointers are not. We could make this safe in - * special cases if root is calling us, but it's - * probably not worth the hassle. - */ - return false; - } + if (!rold->precise) + return true; + /* new val must satisfy old val knowledge */ + return range_within(rold, rcur) && + tnum_in(rold->var_off, rcur->var_off); case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: - /* a PTR_TO_MAP_VALUE could be safe to use as a - * PTR_TO_MAP_VALUE_OR_NULL into the same map. - * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- - * checked, doing so could have affected others with the same - * id, and we can't check for that because we lost the id when - * we converted to a PTR_TO_MAP_VALUE. - */ - if (type_may_be_null(rold->type)) { - if (!type_may_be_null(rcur->type)) - return false; - if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) - return false; - /* Check our ids match any regs they're supposed to */ - return check_ids(rold->id, rcur->id, idmap); - } - /* If the new min/max/var_off satisfy the old ones and * everything else matches, we are OK. - * 'id' is not compared, since it's only used for maps with - * bpf_spin_lock inside map element and in such cases if - * the rest of the prog is valid for one map element then - * it's valid for all map elements regardless of the key - * used in bpf_map_lookup() */ - return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && + return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 && range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off) && check_ids(rold->id, rcur->id, idmap); case PTR_TO_PACKET_META: case PTR_TO_PACKET: - if (rcur->type != rold->type) - return false; /* We must have at least as much range as the old ptr * did, so that any accesses which were safe before are * still safe. This is true even if old range < old off, @@ -13148,7 +13156,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, if (rold->off != rcur->off) return false; /* id relations must be preserved */ - if (rold->id && !check_ids(rold->id, rcur->id, idmap)) + if (!check_ids(rold->id, rcur->id, idmap)) return false; /* new val must satisfy old val knowledge */ return range_within(rold, rcur) && @@ -13157,15 +13165,10 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, /* two stack pointers are equal only if they're pointing to * the same stack frame, since fp-8 in foo != fp-8 in bar */ - return equal && rold->frameno == rcur->frameno; + return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno; default: - /* Only valid matches are exact, which memcmp() */ - return equal; + return regs_exact(rold, rcur, idmap); } - - /* Shouldn't get here; if we do, say it's not safe */ - WARN_ON_ONCE(1); - return false; } static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, @@ -13231,12 +13234,20 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, return true; } -static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur) +static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur, + struct bpf_id_pair *idmap) { + int i; + if (old->acquired_refs != cur->acquired_refs) return false; - return !memcmp(old->refs, cur->refs, - sizeof(*old->refs) * old->acquired_refs); + + for (i = 0; i < old->acquired_refs; i++) { + if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap)) + return false; + } + + return true; } /* compare two verifier states @@ -13278,7 +13289,7 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat if (!stacksafe(env, old, cur, env->idmap_scratch)) return false; - if (!refsafe(old, cur)) + if (!refsafe(old, cur, env->idmap_scratch)) return false; return true; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 3bbd3f0c810c..23ce498bca97 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -369,8 +369,6 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) return &bpf_probe_write_user_proto; } -static DEFINE_RAW_SPINLOCK(trace_printk_lock); - #define MAX_TRACE_PRINTK_VARARGS 3 #define BPF_TRACE_PRINTK_SIZE 1024 @@ -378,23 +376,22 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, u64, arg2, u64, arg3) { u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 }; - u32 *bin_args; - static char buf[BPF_TRACE_PRINTK_SIZE]; - unsigned long flags; + struct bpf_bprintf_data data = { + .get_bin_args = true, + .get_buf = true, + }; int ret; - ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args, - MAX_TRACE_PRINTK_VARARGS); + ret = bpf_bprintf_prepare(fmt, fmt_size, args, + MAX_TRACE_PRINTK_VARARGS, &data); if (ret < 0) return ret; - raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); + ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); - trace_bpf_trace_printk(buf); - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); + trace_bpf_trace_printk(data.buf); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return ret; } @@ -427,30 +424,29 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } -BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data, +BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args, u32, data_len) { - static char buf[BPF_TRACE_PRINTK_SIZE]; - unsigned long flags; + struct bpf_bprintf_data data = { + .get_bin_args = true, + .get_buf = true, + }; int ret, num_args; - u32 *bin_args; if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) + (data_len && !args)) return -EINVAL; num_args = data_len / 8; - ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); + ret = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); if (ret < 0) return ret; - raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); + ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); - trace_bpf_trace_printk(buf); - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); + trace_bpf_trace_printk(data.buf); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return ret; } @@ -472,23 +468,25 @@ const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void) } BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, - const void *, data, u32, data_len) + const void *, args, u32, data_len) { + struct bpf_bprintf_data data = { + .get_bin_args = true, + }; int err, num_args; - u32 *bin_args; if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) + (data_len && !args)) return -EINVAL; num_args = data_len / 8; - err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); + err = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); if (err < 0) return err; - seq_bprintf(m, fmt, bin_args); + seq_bprintf(m, fmt, data.bin_args); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return seq_has_overflowed(m) ? -EOVERFLOW : 0; } diff --git a/net/Makefile b/net/Makefile index 6a62e5b27378..0914bea9c335 100644 --- a/net/Makefile +++ b/net/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_BPFILTER) += bpfilter/ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_BRIDGE) += bridge/ +obj-$(CONFIG_NET_DEVLINK) += devlink/ obj-$(CONFIG_NET_DSA) += dsa/ obj-$(CONFIG_ATALK) += appletalk/ obj-$(CONFIG_X25) += x25/ diff --git a/net/core/Makefile b/net/core/Makefile index 5857cec87b83..10edd66a8a37 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -33,7 +33,6 @@ obj-$(CONFIG_LWTUNNEL) += lwtunnel.o obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o obj-$(CONFIG_DST_CACHE) += dst_cache.o obj-$(CONFIG_HWBM) += hwbm.o -obj-$(CONFIG_NET_DEVLINK) += devlink.o obj-$(CONFIG_GRO_CELLS) += gro_cells.o obj-$(CONFIG_FAILOVER) += failover.o obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o diff --git a/net/core/dev.c b/net/core/dev.c index b76fb37b381e..cf78f35bc0b9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1840,7 +1840,7 @@ EXPORT_SYMBOL(register_netdevice_notifier_net); * @nb: notifier * * Unregister a notifier previously registered by - * register_netdevice_notifier(). The notifier is unlinked into the + * register_netdevice_notifier_net(). The notifier is unlinked from the * kernel structures and may then be reused. A negative errno code * is returned on a failure. * diff --git a/net/core/filter.c b/net/core/filter.c index 43cc1fe58a2c..ab811293ae5d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4618,7 +4618,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, struct ip_tunnel_info *info; if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX | - BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER))) + BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER | + BPF_F_NO_TUNNEL_KEY))) return -EINVAL; if (unlikely(size != sizeof(struct bpf_tunnel_key))) { switch (size) { @@ -4656,6 +4657,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, info->key.tun_flags &= ~TUNNEL_CSUM; if (flags & BPF_F_SEQ_NUMBER) info->key.tun_flags |= TUNNEL_SEQ; + if (flags & BPF_F_NO_TUNNEL_KEY) + info->key.tun_flags &= ~TUNNEL_KEY; info->key.tun_id = cpu_to_be64(from->tunnel_id); info->key.tos = from->tunnel_tos; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4a0eb5593275..3a10387f9434 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -386,8 +386,6 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size) /* build_skb() is wrapper over __build_skb(), that specifically * takes care of skb->head and skb->pfmemalloc - * This means that if @frag_size is not zero, then @data must be backed - * by a page fragment, not kmalloc() or vmalloc() */ struct sk_buff *build_skb(void *data, unsigned int frag_size) { @@ -406,7 +404,7 @@ EXPORT_SYMBOL(build_skb); * build_skb_around - build a network buffer around provided skb * @skb: sk_buff provide by caller, must be memset cleared * @data: data buffer provided by caller - * @frag_size: size of data, or 0 if head was kmalloced + * @frag_size: size of data */ struct sk_buff *build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size) @@ -428,7 +426,7 @@ EXPORT_SYMBOL(build_skb_around); /** * __napi_build_skb - build a network buffer * @data: data buffer provided by caller - * @frag_size: size of data, or 0 if head was kmalloced + * @frag_size: size of data * * Version of __build_skb() that uses NAPI percpu caches to obtain * skbuff_head instead of inplace allocation. @@ -452,7 +450,7 @@ static struct sk_buff *__napi_build_skb(void *data, unsigned int frag_size) /** * napi_build_skb - build a network buffer * @data: data buffer provided by caller - * @frag_size: size of data, or 0 if head was kmalloced + * @frag_size: size of data * * Version of __napi_build_skb() that takes care of skb->head_frag * and skb->pfmemalloc when the data is a page or page fragment. diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 5b1ce656baa1..e7b98162c632 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -643,11 +643,6 @@ static __net_init int sysctl_core_net_init(struct net *net) for (tmp = tbl; tmp->procname; tmp++) tmp->data += (char *)net - (char *)&init_net; - - /* Don't export any sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) { - tbl[0].procname = NULL; - } } net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl); diff --git a/net/devlink/Makefile b/net/devlink/Makefile new file mode 100644 index 000000000000..1b1eeac59cb3 --- /dev/null +++ b/net/devlink/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y := leftover.o core.o netlink.o diff --git a/net/devlink/core.c b/net/devlink/core.c new file mode 100644 index 000000000000..60beca2df7cc --- /dev/null +++ b/net/devlink/core.c @@ -0,0 +1,344 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> + */ + +#include <net/genetlink.h> + +#include "devl_internal.h" + +DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); + +void *devlink_priv(struct devlink *devlink) +{ + return &devlink->priv; +} +EXPORT_SYMBOL_GPL(devlink_priv); + +struct devlink *priv_to_devlink(void *priv) +{ + return container_of(priv, struct devlink, priv); +} +EXPORT_SYMBOL_GPL(priv_to_devlink); + +struct device *devlink_to_dev(const struct devlink *devlink) +{ + return devlink->dev; +} +EXPORT_SYMBOL_GPL(devlink_to_dev); + +struct net *devlink_net(const struct devlink *devlink) +{ + return read_pnet(&devlink->_net); +} +EXPORT_SYMBOL_GPL(devlink_net); + +void devl_assert_locked(struct devlink *devlink) +{ + lockdep_assert_held(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devl_assert_locked); + +#ifdef CONFIG_LOCKDEP +/* For use in conjunction with LOCKDEP only e.g. rcu_dereference_protected() */ +bool devl_lock_is_held(struct devlink *devlink) +{ + return lockdep_is_held(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devl_lock_is_held); +#endif + +void devl_lock(struct devlink *devlink) +{ + mutex_lock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devl_lock); + +int devl_trylock(struct devlink *devlink) +{ + return mutex_trylock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devl_trylock); + +void devl_unlock(struct devlink *devlink) +{ + mutex_unlock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devl_unlock); + +/** + * devlink_try_get() - try to obtain a reference on a devlink instance + * @devlink: instance to reference + * + * Obtain a reference on a devlink instance. A reference on a devlink instance + * only implies that it's safe to take the instance lock. It does not imply + * that the instance is registered, use devl_is_registered() after taking + * the instance lock to check registration status. + */ +struct devlink *__must_check devlink_try_get(struct devlink *devlink) +{ + if (refcount_inc_not_zero(&devlink->refcount)) + return devlink; + return NULL; +} + +static void devlink_release(struct work_struct *work) +{ + struct devlink *devlink; + + devlink = container_of(to_rcu_work(work), struct devlink, rwork); + + mutex_destroy(&devlink->lock); + lockdep_unregister_key(&devlink->lock_key); + kfree(devlink); +} + +void devlink_put(struct devlink *devlink) +{ + if (refcount_dec_and_test(&devlink->refcount)) + queue_rcu_work(system_wq, &devlink->rwork); +} + +struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp) +{ + struct devlink *devlink = NULL; + + rcu_read_lock(); +retry: + devlink = xa_find(&devlinks, indexp, ULONG_MAX, DEVLINK_REGISTERED); + if (!devlink) + goto unlock; + + if (!devlink_try_get(devlink)) + goto next; + if (!net_eq(devlink_net(devlink), net)) { + devlink_put(devlink); + goto next; + } +unlock: + rcu_read_unlock(); + return devlink; + +next: + (*indexp)++; + goto retry; +} + +/** + * devlink_set_features - Set devlink supported features + * + * @devlink: devlink + * @features: devlink support features + * + * This interface allows us to set reload ops separatelly from + * the devlink_alloc. + */ +void devlink_set_features(struct devlink *devlink, u64 features) +{ + WARN_ON(features & DEVLINK_F_RELOAD && + !devlink_reload_supported(devlink->ops)); + devlink->features = features; +} +EXPORT_SYMBOL_GPL(devlink_set_features); + +/** + * devl_register - Register devlink instance + * @devlink: devlink + */ +int devl_register(struct devlink *devlink) +{ + ASSERT_DEVLINK_NOT_REGISTERED(devlink); + devl_assert_locked(devlink); + + xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); + devlink_notify_register(devlink); + + return 0; +} +EXPORT_SYMBOL_GPL(devl_register); + +void devlink_register(struct devlink *devlink) +{ + devl_lock(devlink); + devl_register(devlink); + devl_unlock(devlink); +} +EXPORT_SYMBOL_GPL(devlink_register); + +/** + * devl_unregister - Unregister devlink instance + * @devlink: devlink + */ +void devl_unregister(struct devlink *devlink) +{ + ASSERT_DEVLINK_REGISTERED(devlink); + devl_assert_locked(devlink); + + devlink_notify_unregister(devlink); + xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); +} +EXPORT_SYMBOL_GPL(devl_unregister); + +void devlink_unregister(struct devlink *devlink) +{ + devl_lock(devlink); + devl_unregister(devlink); + devl_unlock(devlink); +} +EXPORT_SYMBOL_GPL(devlink_unregister); + +/** + * devlink_alloc_ns - Allocate new devlink instance resources + * in specific namespace + * + * @ops: ops + * @priv_size: size of user private data + * @net: net namespace + * @dev: parent device + * + * Allocate new devlink instance resources, including devlink index + * and name. + */ +struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, + size_t priv_size, struct net *net, + struct device *dev) +{ + struct devlink *devlink; + static u32 last_id; + int ret; + + WARN_ON(!ops || !dev); + if (!devlink_reload_actions_valid(ops)) + return NULL; + + devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL); + if (!devlink) + return NULL; + + ret = xa_alloc_cyclic(&devlinks, &devlink->index, devlink, xa_limit_31b, + &last_id, GFP_KERNEL); + if (ret < 0) + goto err_xa_alloc; + + devlink->netdevice_nb.notifier_call = devlink_port_netdevice_event; + ret = register_netdevice_notifier_net(net, &devlink->netdevice_nb); + if (ret) + goto err_register_netdevice_notifier; + + devlink->dev = dev; + devlink->ops = ops; + xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC); + xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); + write_pnet(&devlink->_net, net); + INIT_LIST_HEAD(&devlink->rate_list); + INIT_LIST_HEAD(&devlink->linecard_list); + INIT_LIST_HEAD(&devlink->sb_list); + INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list); + INIT_LIST_HEAD(&devlink->resource_list); + INIT_LIST_HEAD(&devlink->param_list); + INIT_LIST_HEAD(&devlink->region_list); + INIT_LIST_HEAD(&devlink->reporter_list); + INIT_LIST_HEAD(&devlink->trap_list); + INIT_LIST_HEAD(&devlink->trap_group_list); + INIT_LIST_HEAD(&devlink->trap_policer_list); + INIT_RCU_WORK(&devlink->rwork, devlink_release); + lockdep_register_key(&devlink->lock_key); + mutex_init(&devlink->lock); + lockdep_set_class(&devlink->lock, &devlink->lock_key); + mutex_init(&devlink->reporters_lock); + mutex_init(&devlink->linecards_lock); + refcount_set(&devlink->refcount, 1); + + return devlink; + +err_register_netdevice_notifier: + xa_erase(&devlinks, devlink->index); +err_xa_alloc: + kfree(devlink); + return NULL; +} +EXPORT_SYMBOL_GPL(devlink_alloc_ns); + +/** + * devlink_free - Free devlink instance resources + * + * @devlink: devlink + */ +void devlink_free(struct devlink *devlink) +{ + ASSERT_DEVLINK_NOT_REGISTERED(devlink); + + mutex_destroy(&devlink->linecards_lock); + mutex_destroy(&devlink->reporters_lock); + WARN_ON(!list_empty(&devlink->trap_policer_list)); + WARN_ON(!list_empty(&devlink->trap_group_list)); + WARN_ON(!list_empty(&devlink->trap_list)); + WARN_ON(!list_empty(&devlink->reporter_list)); + WARN_ON(!list_empty(&devlink->region_list)); + WARN_ON(!list_empty(&devlink->param_list)); + WARN_ON(!list_empty(&devlink->resource_list)); + WARN_ON(!list_empty(&devlink->dpipe_table_list)); + WARN_ON(!list_empty(&devlink->sb_list)); + WARN_ON(!list_empty(&devlink->rate_list)); + WARN_ON(!list_empty(&devlink->linecard_list)); + WARN_ON(!xa_empty(&devlink->ports)); + + xa_destroy(&devlink->snapshot_ids); + xa_destroy(&devlink->ports); + + WARN_ON_ONCE(unregister_netdevice_notifier_net(devlink_net(devlink), + &devlink->netdevice_nb)); + + xa_erase(&devlinks, devlink->index); + + devlink_put(devlink); +} +EXPORT_SYMBOL_GPL(devlink_free); + +static void __net_exit devlink_pernet_pre_exit(struct net *net) +{ + struct devlink *devlink; + u32 actions_performed; + unsigned long index; + int err; + + /* In case network namespace is getting destroyed, reload + * all devlink instances from this namespace into init_net. + */ + devlinks_xa_for_each_registered_get(net, index, devlink) { + WARN_ON(!(devlink->features & DEVLINK_F_RELOAD)); + devl_lock(devlink); + err = 0; + if (devl_is_registered(devlink)) + err = devlink_reload(devlink, &init_net, + DEVLINK_RELOAD_ACTION_DRIVER_REINIT, + DEVLINK_RELOAD_LIMIT_UNSPEC, + &actions_performed, NULL); + devl_unlock(devlink); + devlink_put(devlink); + + if (err && err != -EOPNOTSUPP) + pr_warn("Failed to reload devlink instance into init_net\n"); + } +} + +static struct pernet_operations devlink_pernet_ops __net_initdata = { + .pre_exit = devlink_pernet_pre_exit, +}; + +static int __init devlink_init(void) +{ + int err; + + err = genl_register_family(&devlink_nl_family); + if (err) + goto out; + err = register_pernet_subsys(&devlink_pernet_ops); + +out: + WARN_ON(err); + return err; +} + +subsys_initcall(devlink_init); diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h new file mode 100644 index 000000000000..e724e4c2a4ff --- /dev/null +++ b/net/devlink/devl_internal.h @@ -0,0 +1,206 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> + */ + +#include <linux/mutex.h> +#include <linux/netdevice.h> +#include <linux/notifier.h> +#include <linux/types.h> +#include <linux/workqueue.h> +#include <linux/xarray.h> +#include <net/devlink.h> +#include <net/net_namespace.h> + +#define DEVLINK_REGISTERED XA_MARK_1 + +#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \ + (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX) + +struct devlink_dev_stats { + u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; + u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; +}; + +struct devlink { + u32 index; + struct xarray ports; + struct list_head rate_list; + struct list_head sb_list; + struct list_head dpipe_table_list; + struct list_head resource_list; + struct list_head param_list; + struct list_head region_list; + struct list_head reporter_list; + struct mutex reporters_lock; /* protects reporter_list */ + struct devlink_dpipe_headers *dpipe_headers; + struct list_head trap_list; + struct list_head trap_group_list; + struct list_head trap_policer_list; + struct list_head linecard_list; + struct mutex linecards_lock; /* protects linecard_list */ + const struct devlink_ops *ops; + u64 features; + struct xarray snapshot_ids; + struct devlink_dev_stats stats; + struct device *dev; + possible_net_t _net; + /* Serializes access to devlink instance specific objects such as + * port, sb, dpipe, resource, params, region, traps and more. + */ + struct mutex lock; + struct lock_class_key lock_key; + u8 reload_failed:1; + refcount_t refcount; + struct rcu_work rwork; + struct notifier_block netdevice_nb; + char priv[] __aligned(NETDEV_ALIGN); +}; + +extern struct xarray devlinks; +extern struct genl_family devlink_nl_family; + +/* devlink instances are open to the access from the user space after + * devlink_register() call. Such logical barrier allows us to have certain + * expectations related to locking. + * + * Before *_register() - we are in initialization stage and no parallel + * access possible to the devlink instance. All drivers perform that phase + * by implicitly holding device_lock. + * + * After *_register() - users and driver can access devlink instance at + * the same time. + */ +#define ASSERT_DEVLINK_REGISTERED(d) \ + WARN_ON_ONCE(!xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED)) +#define ASSERT_DEVLINK_NOT_REGISTERED(d) \ + WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED)) + +/* Iterate over devlink pointers which were possible to get reference to. + * devlink_put() needs to be called for each iterated devlink pointer + * in loop body in order to release the reference. + */ +#define devlinks_xa_for_each_registered_get(net, index, devlink) \ + for (index = 0; (devlink = devlinks_xa_find_get(net, &index)); index++) + +struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp); + +static inline bool devl_is_registered(struct devlink *devlink) +{ + /* To prevent races the caller must hold the instance lock + * or another lock taken during unregistration. + */ + return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); +} + +/* Netlink */ +#define DEVLINK_NL_FLAG_NEED_PORT BIT(0) +#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1) +#define DEVLINK_NL_FLAG_NEED_RATE BIT(2) +#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3) +#define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4) + +enum devlink_multicast_groups { + DEVLINK_MCGRP_CONFIG, +}; + +/* state held across netlink dumps */ +struct devlink_nl_dump_state { + unsigned long instance; + int idx; + union { + /* DEVLINK_CMD_REGION_READ */ + struct { + u64 start_offset; + }; + /* DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET */ + struct { + u64 dump_ts; + }; + }; +}; + +struct devlink_gen_cmd { + int (*dump_one)(struct sk_buff *msg, struct devlink *devlink, + struct netlink_callback *cb); +}; + +/* Iterate over registered devlink instances for devlink dump. + * devlink_put() needs to be called for each iterated devlink pointer + * in loop body in order to release the reference. + * Note: this is NOT a generic iterator, it makes assumptions about the use + * of @state and can only be used once per dumpit implementation. + */ +#define devlink_dump_for_each_instance_get(msg, state, devlink) \ + for (; (devlink = devlinks_xa_find_get(sock_net(msg->sk), \ + &state->instance)); \ + state->instance++, state->idx = 0) + +extern const struct genl_small_ops devlink_nl_ops[56]; + +struct devlink * +devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs); + +void devlink_notify_unregister(struct devlink *devlink); +void devlink_notify_register(struct devlink *devlink); + +int devlink_nl_instance_iter_dump(struct sk_buff *msg, + struct netlink_callback *cb); + +static inline struct devlink_nl_dump_state * +devlink_dump_state(struct netlink_callback *cb) +{ + NL_ASSET_DUMP_CTX_FITS(struct devlink_nl_dump_state); + + return (struct devlink_nl_dump_state *)cb->ctx; +} + +/* gen cmds */ +extern const struct devlink_gen_cmd devl_gen_inst; +extern const struct devlink_gen_cmd devl_gen_port; +extern const struct devlink_gen_cmd devl_gen_sb; +extern const struct devlink_gen_cmd devl_gen_sb_pool; +extern const struct devlink_gen_cmd devl_gen_sb_port_pool; +extern const struct devlink_gen_cmd devl_gen_sb_tc_pool_bind; +extern const struct devlink_gen_cmd devl_gen_selftests; +extern const struct devlink_gen_cmd devl_gen_param; +extern const struct devlink_gen_cmd devl_gen_region; +extern const struct devlink_gen_cmd devl_gen_info; +extern const struct devlink_gen_cmd devl_gen_trap; +extern const struct devlink_gen_cmd devl_gen_trap_group; +extern const struct devlink_gen_cmd devl_gen_trap_policer; + +/* Ports */ +int devlink_port_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr); + +struct devlink_port * +devlink_port_get_from_info(struct devlink *devlink, struct genl_info *info); + +/* Reload */ +bool devlink_reload_actions_valid(const struct devlink_ops *ops); +int devlink_reload(struct devlink *devlink, struct net *dest_net, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + u32 *actions_performed, struct netlink_ext_ack *extack); + +static inline bool devlink_reload_supported(const struct devlink_ops *ops) +{ + return ops->reload_down && ops->reload_up; +} + +/* Line cards */ +struct devlink_linecard; + +struct devlink_linecard * +devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info); +void devlink_linecard_put(struct devlink_linecard *linecard); + +/* Rates */ +extern const struct devlink_gen_cmd devl_gen_rate_get; + +struct devlink_rate * +devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info); +struct devlink_rate * +devlink_rate_node_get_from_info(struct devlink *devlink, + struct genl_info *info); diff --git a/net/core/devlink.c b/net/devlink/leftover.c index 032d6d0a5ce6..1e23b2da78cc 100644 --- a/net/core/devlink.c +++ b/net/devlink/leftover.c @@ -31,52 +31,7 @@ #define CREATE_TRACE_POINTS #include <trace/events/devlink.h> -#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \ - (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX) - -struct devlink_dev_stats { - u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; - u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; -}; - -struct devlink { - u32 index; - struct xarray ports; - struct list_head rate_list; - struct list_head sb_list; - struct list_head dpipe_table_list; - struct list_head resource_list; - struct list_head param_list; - struct list_head region_list; - struct list_head reporter_list; - struct mutex reporters_lock; /* protects reporter_list */ - struct devlink_dpipe_headers *dpipe_headers; - struct list_head trap_list; - struct list_head trap_group_list; - struct list_head trap_policer_list; - struct list_head linecard_list; - struct mutex linecards_lock; /* protects linecard_list */ - const struct devlink_ops *ops; - u64 features; - struct xarray snapshot_ids; - struct devlink_dev_stats stats; - struct device *dev; - possible_net_t _net; - /* Serializes access to devlink instance specific objects such as - * port, sb, dpipe, resource, params, region, traps and more. - */ - struct mutex lock; - struct lock_class_key lock_key; - u8 reload_failed:1; - refcount_t refcount; - struct completion comp; - struct rcu_head rcu; - struct notifier_block netdevice_nb; - char priv[] __aligned(NETDEV_ALIGN); -}; - -struct devlink_linecard_ops; -struct devlink_linecard_type; +#include "devl_internal.h" struct devlink_linecard { struct list_head list; @@ -122,24 +77,6 @@ struct devlink_resource { void *occ_get_priv; }; -void *devlink_priv(struct devlink *devlink) -{ - return &devlink->priv; -} -EXPORT_SYMBOL_GPL(devlink_priv); - -struct devlink *priv_to_devlink(void *priv) -{ - return container_of(priv, struct devlink, priv); -} -EXPORT_SYMBOL_GPL(priv_to_devlink); - -struct device *devlink_to_dev(const struct devlink *devlink) -{ - return devlink->dev; -} -EXPORT_SYMBOL_GPL(devlink_to_dev); - static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = { { .name = "destination mac", @@ -211,172 +148,6 @@ static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG }, }; -static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); -#define DEVLINK_REGISTERED XA_MARK_1 -#define DEVLINK_UNREGISTERING XA_MARK_2 - -/* devlink instances are open to the access from the user space after - * devlink_register() call. Such logical barrier allows us to have certain - * expectations related to locking. - * - * Before *_register() - we are in initialization stage and no parallel - * access possible to the devlink instance. All drivers perform that phase - * by implicitly holding device_lock. - * - * After *_register() - users and driver can access devlink instance at - * the same time. - */ -#define ASSERT_DEVLINK_REGISTERED(d) \ - WARN_ON_ONCE(!xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED)) -#define ASSERT_DEVLINK_NOT_REGISTERED(d) \ - WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED)) - -struct net *devlink_net(const struct devlink *devlink) -{ - return read_pnet(&devlink->_net); -} -EXPORT_SYMBOL_GPL(devlink_net); - -static void __devlink_put_rcu(struct rcu_head *head) -{ - struct devlink *devlink = container_of(head, struct devlink, rcu); - - complete(&devlink->comp); -} - -void devlink_put(struct devlink *devlink) -{ - if (refcount_dec_and_test(&devlink->refcount)) - /* Make sure unregister operation that may await the completion - * is unblocked only after all users are after the end of - * RCU grace period. - */ - call_rcu(&devlink->rcu, __devlink_put_rcu); -} - -struct devlink *__must_check devlink_try_get(struct devlink *devlink) -{ - if (refcount_inc_not_zero(&devlink->refcount)) - return devlink; - return NULL; -} - -void devl_assert_locked(struct devlink *devlink) -{ - lockdep_assert_held(&devlink->lock); -} -EXPORT_SYMBOL_GPL(devl_assert_locked); - -#ifdef CONFIG_LOCKDEP -/* For use in conjunction with LOCKDEP only e.g. rcu_dereference_protected() */ -bool devl_lock_is_held(struct devlink *devlink) -{ - return lockdep_is_held(&devlink->lock); -} -EXPORT_SYMBOL_GPL(devl_lock_is_held); -#endif - -void devl_lock(struct devlink *devlink) -{ - mutex_lock(&devlink->lock); -} -EXPORT_SYMBOL_GPL(devl_lock); - -int devl_trylock(struct devlink *devlink) -{ - return mutex_trylock(&devlink->lock); -} -EXPORT_SYMBOL_GPL(devl_trylock); - -void devl_unlock(struct devlink *devlink) -{ - mutex_unlock(&devlink->lock); -} -EXPORT_SYMBOL_GPL(devl_unlock); - -static struct devlink * -devlinks_xa_find_get(struct net *net, unsigned long *indexp, xa_mark_t filter, - void * (*xa_find_fn)(struct xarray *, unsigned long *, - unsigned long, xa_mark_t)) -{ - struct devlink *devlink; - - rcu_read_lock(); -retry: - devlink = xa_find_fn(&devlinks, indexp, ULONG_MAX, DEVLINK_REGISTERED); - if (!devlink) - goto unlock; - - /* In case devlink_unregister() was already called and "unregistering" - * mark was set, do not allow to get a devlink reference here. - * This prevents live-lock of devlink_unregister() wait for completion. - */ - if (xa_get_mark(&devlinks, *indexp, DEVLINK_UNREGISTERING)) - goto retry; - - /* For a possible retry, the xa_find_after() should be always used */ - xa_find_fn = xa_find_after; - if (!devlink_try_get(devlink)) - goto retry; - if (!net_eq(devlink_net(devlink), net)) { - devlink_put(devlink); - goto retry; - } -unlock: - rcu_read_unlock(); - return devlink; -} - -static struct devlink *devlinks_xa_find_get_first(struct net *net, - unsigned long *indexp, - xa_mark_t filter) -{ - return devlinks_xa_find_get(net, indexp, filter, xa_find); -} - -static struct devlink *devlinks_xa_find_get_next(struct net *net, - unsigned long *indexp, - xa_mark_t filter) -{ - return devlinks_xa_find_get(net, indexp, filter, xa_find_after); -} - -/* Iterate over devlink pointers which were possible to get reference to. - * devlink_put() needs to be called for each iterated devlink pointer - * in loop body in order to release the reference. - */ -#define devlinks_xa_for_each_get(net, index, devlink, filter) \ - for (index = 0, \ - devlink = devlinks_xa_find_get_first(net, &index, filter); \ - devlink; devlink = devlinks_xa_find_get_next(net, &index, filter)) - -#define devlinks_xa_for_each_registered_get(net, index, devlink) \ - devlinks_xa_for_each_get(net, index, devlink, DEVLINK_REGISTERED) - -static struct devlink *devlink_get_from_attrs(struct net *net, - struct nlattr **attrs) -{ - struct devlink *devlink; - unsigned long index; - char *busname; - char *devname; - - if (!attrs[DEVLINK_ATTR_BUS_NAME] || !attrs[DEVLINK_ATTR_DEV_NAME]) - return ERR_PTR(-EINVAL); - - busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]); - devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]); - - devlinks_xa_for_each_registered_get(net, index, devlink) { - if (strcmp(devlink->dev->bus->name, busname) == 0 && - strcmp(dev_name(devlink->dev), devname) == 0) - return devlink; - devlink_put(devlink); - } - - return ERR_PTR(-ENODEV); -} - #define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) \ WARN_ON_ONCE(!(devlink_port)->registered) #define ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port) \ @@ -405,8 +176,8 @@ static struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink, return ERR_PTR(-EINVAL); } -static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink, - struct genl_info *info) +struct devlink_port *devlink_port_get_from_info(struct devlink *devlink, + struct genl_info *info) { return devlink_port_get_from_attrs(devlink, info->attrs); } @@ -466,13 +237,13 @@ devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs) return devlink_rate_node_get_by_name(devlink, rate_node_name); } -static struct devlink_rate * +struct devlink_rate * devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info) { return devlink_rate_node_get_from_attrs(devlink, info->attrs); } -static struct devlink_rate * +struct devlink_rate * devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info) { struct nlattr **attrs = info->attrs; @@ -523,13 +294,13 @@ devlink_linecard_get_from_attrs(struct devlink *devlink, struct nlattr **attrs) return ERR_PTR(-EINVAL); } -static struct devlink_linecard * +struct devlink_linecard * devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info) { return devlink_linecard_get_from_attrs(devlink, info->attrs); } -static void devlink_linecard_put(struct devlink_linecard *linecard) +void devlink_linecard_put(struct devlink_linecard *linecard) { if (refcount_dec_and_test(&linecard->refcount)) { mutex_destroy(&linecard->state_lock); @@ -838,95 +609,6 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) return NULL; } -#define DEVLINK_NL_FLAG_NEED_PORT BIT(0) -#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1) -#define DEVLINK_NL_FLAG_NEED_RATE BIT(2) -#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3) -#define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4) - -static int devlink_nl_pre_doit(const struct genl_split_ops *ops, - struct sk_buff *skb, struct genl_info *info) -{ - struct devlink_linecard *linecard; - struct devlink_port *devlink_port; - struct devlink *devlink; - int err; - - devlink = devlink_get_from_attrs(genl_info_net(info), info->attrs); - if (IS_ERR(devlink)) - return PTR_ERR(devlink); - devl_lock(devlink); - info->user_ptr[0] = devlink; - if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { - devlink_port = devlink_port_get_from_info(devlink, info); - if (IS_ERR(devlink_port)) { - err = PTR_ERR(devlink_port); - goto unlock; - } - info->user_ptr[1] = devlink_port; - } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) { - devlink_port = devlink_port_get_from_info(devlink, info); - if (!IS_ERR(devlink_port)) - info->user_ptr[1] = devlink_port; - } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) { - struct devlink_rate *devlink_rate; - - devlink_rate = devlink_rate_get_from_info(devlink, info); - if (IS_ERR(devlink_rate)) { - err = PTR_ERR(devlink_rate); - goto unlock; - } - info->user_ptr[1] = devlink_rate; - } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE_NODE) { - struct devlink_rate *rate_node; - - rate_node = devlink_rate_node_get_from_info(devlink, info); - if (IS_ERR(rate_node)) { - err = PTR_ERR(rate_node); - goto unlock; - } - info->user_ptr[1] = rate_node; - } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) { - linecard = devlink_linecard_get_from_info(devlink, info); - if (IS_ERR(linecard)) { - err = PTR_ERR(linecard); - goto unlock; - } - info->user_ptr[1] = linecard; - } - return 0; - -unlock: - devl_unlock(devlink); - devlink_put(devlink); - return err; -} - -static void devlink_nl_post_doit(const struct genl_split_ops *ops, - struct sk_buff *skb, struct genl_info *info) -{ - struct devlink_linecard *linecard; - struct devlink *devlink; - - devlink = info->user_ptr[0]; - if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) { - linecard = info->user_ptr[1]; - devlink_linecard_put(linecard); - } - devl_unlock(devlink); - devlink_put(devlink); -} - -static struct genl_family devlink_nl_family; - -enum devlink_multicast_groups { - DEVLINK_MCGRP_CONFIG, -}; - -static const struct genl_multicast_group devlink_nl_mcgrps[] = { - [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME }, -}; - static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink) { if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name)) @@ -1537,47 +1219,40 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } -static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +static int +devlink_nl_cmd_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink, + struct netlink_callback *cb) { + struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_rate *devlink_rate; - struct devlink *devlink; - int start = cb->args[0]; - unsigned long index; int idx = 0; int err = 0; - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - devl_lock(devlink); - list_for_each_entry(devlink_rate, &devlink->rate_list, list) { - enum devlink_command cmd = DEVLINK_CMD_RATE_NEW; - u32 id = NETLINK_CB(cb->skb).portid; + list_for_each_entry(devlink_rate, &devlink->rate_list, list) { + enum devlink_command cmd = DEVLINK_CMD_RATE_NEW; + u32 id = NETLINK_CB(cb->skb).portid; - if (idx < start) { - idx++; - continue; - } - err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id, - cb->nlh->nlmsg_seq, - NLM_F_MULTI, NULL); - if (err) { - devl_unlock(devlink); - devlink_put(devlink); - goto out; - } + if (idx < state->idx) { idx++; + continue; } - devl_unlock(devlink); - devlink_put(devlink); + err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, NULL); + if (err) { + state->idx = idx; + break; + } + idx++; } -out: - if (err != -EMSGSIZE) - return err; - cb->args[0] = idx; - return msg->len; + return err; } +const struct devlink_gen_cmd devl_gen_rate_get = { + .dump_one = devlink_nl_cmd_rate_get_dump_one, +}; + static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb, struct genl_info *info) { @@ -1632,38 +1307,19 @@ static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info) return genlmsg_reply(msg, info); } -static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +static int +devlink_nl_cmd_get_dump_one(struct sk_buff *msg, struct devlink *devlink, + struct netlink_callback *cb) { - struct devlink *devlink; - int start = cb->args[0]; - unsigned long index; - int idx = 0; - int err; - - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - if (idx < start) { - idx++; - devlink_put(devlink); - continue; - } - - devl_lock(devlink); - err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI); - devl_unlock(devlink); - devlink_put(devlink); - - if (err) - goto out; - idx++; - } -out: - cb->args[0] = idx; - return msg->len; + return devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI); } +const struct devlink_gen_cmd devl_gen_inst = { + .dump_one = devlink_nl_cmd_get_dump_one, +}; + static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb, struct genl_info *info) { @@ -1686,43 +1342,40 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb, return genlmsg_reply(msg, info); } -static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +static int +devlink_nl_cmd_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink, + struct netlink_callback *cb) { - struct devlink *devlink; + struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_port *devlink_port; - unsigned long index, port_index; - int start = cb->args[0]; + unsigned long port_index; int idx = 0; - int err; + int err = 0; - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - devl_lock(devlink); - xa_for_each(&devlink->ports, port_index, devlink_port) { - if (idx < start) { - idx++; - continue; - } - err = devlink_nl_port_fill(msg, devlink_port, - DEVLINK_CMD_NEW, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI, cb->extack); - if (err) { - devl_unlock(devlink); - devlink_put(devlink); - goto out; - } + xa_for_each(&devlink->ports, port_index, devlink_port) { + if (idx < state->idx) { idx++; + continue; } - devl_unlock(devlink); - devlink_put(devlink); + err = devlink_nl_port_fill(msg, devlink_port, + DEVLINK_CMD_NEW, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, cb->extack); + if (err) { + state->idx = idx; + break; + } + idx++; } -out: - cb->args[0] = idx; - return msg->len; + + return err; } +const struct devlink_gen_cmd devl_gen_port = { + .dump_one = devlink_nl_cmd_port_get_dump_one, +}; + static int devlink_port_type_set(struct devlink_port *devlink_port, enum devlink_port_type port_type) @@ -2468,17 +2121,20 @@ static int devlink_nl_cmd_linecard_get_doit(struct sk_buff *skb, static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { + struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_linecard *linecard; struct devlink *devlink; - int start = cb->args[0]; - unsigned long index; - int idx = 0; int err; - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + devlink_dump_for_each_instance_get(msg, state, devlink) { + int idx = 0; + mutex_lock(&devlink->linecards_lock); + if (!devl_is_registered(devlink)) + goto next_devlink; + list_for_each_entry(linecard, &devlink->linecard_list, list) { - if (idx < start) { + if (idx < state->idx) { idx++; continue; } @@ -2493,15 +2149,16 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg, if (err) { mutex_unlock(&devlink->linecards_lock); devlink_put(devlink); + state->idx = idx; goto out; } idx++; } +next_devlink: mutex_unlock(&devlink->linecards_lock); devlink_put(devlink); } out: - cb->args[0] = idx; return msg->len; } @@ -2727,43 +2384,39 @@ static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb, return genlmsg_reply(msg, info); } -static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +static int +devlink_nl_cmd_sb_get_dump_one(struct sk_buff *msg, struct devlink *devlink, + struct netlink_callback *cb) { - struct devlink *devlink; + struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_sb *devlink_sb; - int start = cb->args[0]; - unsigned long index; int idx = 0; - int err; + int err = 0; - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - devl_lock(devlink); - list_for_each_entry(devlink_sb, &devlink->sb_list, list) { - if (idx < start) { - idx++; - continue; - } - err = devlink_nl_sb_fill(msg, devlink, devlink_sb, - DEVLINK_CMD_SB_NEW, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI); - if (err) { - devl_unlock(devlink); - devlink_put(devlink); - goto out; - } + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + if (idx < state->idx) { idx++; + continue; } - devl_unlock(devlink); - devlink_put(devlink); + err = devlink_nl_sb_fill(msg, devlink, devlink_sb, + DEVLINK_CMD_SB_NEW, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + state->idx = idx; + break; + } + idx++; } -out: - cb->args[0] = idx; - return msg->len; + + return err; } +const struct devlink_gen_cmd devl_gen_sb = { + .dump_one = devlink_nl_cmd_sb_get_dump_one, +}; + static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_sb *devlink_sb, u16 pool_index, enum devlink_command cmd, @@ -2869,46 +2522,39 @@ static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx, return 0; } -static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +static int +devlink_nl_cmd_sb_pool_get_dump_one(struct sk_buff *msg, + struct devlink *devlink, + struct netlink_callback *cb) { - struct devlink *devlink; + struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_sb *devlink_sb; - int start = cb->args[0]; - unsigned long index; - int idx = 0; int err = 0; + int idx = 0; - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - if (!devlink->ops->sb_pool_get) - goto retry; + if (!devlink->ops->sb_pool_get) + return 0; - devl_lock(devlink); - list_for_each_entry(devlink_sb, &devlink->sb_list, list) { - err = __sb_pool_get_dumpit(msg, start, &idx, devlink, - devlink_sb, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq); - if (err == -EOPNOTSUPP) { - err = 0; - } else if (err) { - devl_unlock(devlink); - devlink_put(devlink); - goto out; - } + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + err = __sb_pool_get_dumpit(msg, state->idx, &idx, + devlink, devlink_sb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq); + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { + state->idx = idx; + break; } - devl_unlock(devlink); -retry: - devlink_put(devlink); } -out: - if (err != -EMSGSIZE) - return err; - cb->args[0] = idx; - return msg->len; + return err; } +const struct devlink_gen_cmd devl_gen_sb_pool = { + .dump_one = devlink_nl_cmd_sb_pool_get_dump_one, +}; + static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index, u16 pool_index, u32 size, enum devlink_sb_threshold_type threshold_type, @@ -3084,46 +2730,39 @@ static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx, return 0; } -static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +static int +devlink_nl_cmd_sb_port_pool_get_dump_one(struct sk_buff *msg, + struct devlink *devlink, + struct netlink_callback *cb) { - struct devlink *devlink; + struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_sb *devlink_sb; - int start = cb->args[0]; - unsigned long index; int idx = 0; int err = 0; - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - if (!devlink->ops->sb_port_pool_get) - goto retry; + if (!devlink->ops->sb_port_pool_get) + return 0; - devl_lock(devlink); - list_for_each_entry(devlink_sb, &devlink->sb_list, list) { - err = __sb_port_pool_get_dumpit(msg, start, &idx, - devlink, devlink_sb, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq); - if (err == -EOPNOTSUPP) { - err = 0; - } else if (err) { - devl_unlock(devlink); - devlink_put(devlink); - goto out; - } + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + err = __sb_port_pool_get_dumpit(msg, state->idx, &idx, + devlink, devlink_sb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq); + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { + state->idx = idx; + break; } - devl_unlock(devlink); -retry: - devlink_put(devlink); } -out: - if (err != -EMSGSIZE) - return err; - cb->args[0] = idx; - return msg->len; + return err; } +const struct devlink_gen_cmd devl_gen_sb_port_pool = { + .dump_one = devlink_nl_cmd_sb_port_pool_get_dump_one, +}; + static int devlink_sb_port_pool_set(struct devlink_port *devlink_port, unsigned int sb_index, u16 pool_index, u32 threshold, @@ -3327,47 +2966,38 @@ static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, } static int -devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +devlink_nl_cmd_sb_tc_pool_bind_get_dump_one(struct sk_buff *msg, + struct devlink *devlink, + struct netlink_callback *cb) { - struct devlink *devlink; + struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_sb *devlink_sb; - int start = cb->args[0]; - unsigned long index; int idx = 0; int err = 0; - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - if (!devlink->ops->sb_tc_pool_bind_get) - goto retry; + if (!devlink->ops->sb_tc_pool_bind_get) + return 0; - devl_lock(devlink); - list_for_each_entry(devlink_sb, &devlink->sb_list, list) { - err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx, - devlink, - devlink_sb, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq); - if (err == -EOPNOTSUPP) { - err = 0; - } else if (err) { - devl_unlock(devlink); - devlink_put(devlink); - goto out; - } + list_for_each_entry(devlink_sb, &devlink->sb_list, list) { + err = __sb_tc_pool_bind_get_dumpit(msg, state->idx, &idx, + devlink, devlink_sb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq); + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { + state->idx = idx; + break; } - devl_unlock(devlink); -retry: - devlink_put(devlink); } -out: - if (err != -EMSGSIZE) - return err; - cb->args[0] = idx; - return msg->len; + return err; } +const struct devlink_gen_cmd devl_gen_sb_tc_pool_bind = { + .dump_one = devlink_nl_cmd_sb_tc_pool_bind_get_dump_one, +}; + static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port, unsigned int sb_index, u16 tc_index, enum devlink_sb_pool_type pool_type, @@ -4653,11 +4283,6 @@ static void devlink_ns_change_notify(struct devlink *devlink, devlink_notify(devlink, DEVLINK_CMD_DEL); } -static bool devlink_reload_supported(const struct devlink_ops *ops) -{ - return ops->reload_down && ops->reload_up; -} - static void devlink_reload_failed_set(struct devlink *devlink, bool reload_failed) { @@ -4725,9 +4350,10 @@ void devlink_remote_reload_actions_performed(struct devlink *devlink, } EXPORT_SYMBOL_GPL(devlink_remote_reload_actions_performed); -static int devlink_reload(struct devlink *devlink, struct net *dest_net, - enum devlink_reload_action action, enum devlink_reload_limit limit, - u32 *actions_performed, struct netlink_ext_ack *extack) +int devlink_reload(struct devlink *devlink, struct net *dest_net, + enum devlink_reload_action action, + enum devlink_reload_limit limit, + u32 *actions_performed, struct netlink_ext_ack *extack) { u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE]; struct net *curr_net; @@ -5190,41 +4816,24 @@ static int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb, return genlmsg_reply(msg, info); } -static int devlink_nl_cmd_selftests_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +static int +devlink_nl_cmd_selftests_get_dump_one(struct sk_buff *msg, + struct devlink *devlink, + struct netlink_callback *cb) { - struct devlink *devlink; - int start = cb->args[0]; - unsigned long index; - int idx = 0; - int err = 0; - - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - if (idx < start || !devlink->ops->selftest_check) - goto inc; - - devl_lock(devlink); - err = devlink_nl_selftests_fill(msg, devlink, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - cb->extack); - devl_unlock(devlink); - if (err) { - devlink_put(devlink); - break; - } -inc: - idx++; - devlink_put(devlink); - } - - if (err != -EMSGSIZE) - return err; + if (!devlink->ops->selftest_check) + return 0; - cb->args[0] = idx; - return msg->len; + return devlink_nl_selftests_fill(msg, devlink, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + cb->extack); } +const struct devlink_gen_cmd devl_gen_selftests = { + .dump_one = devlink_nl_cmd_selftests_get_dump_one, +}; + static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id, enum devlink_selftest_status test_status) { @@ -5654,7 +5263,13 @@ static void devlink_param_notify(struct devlink *devlink, WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL && cmd != DEVLINK_CMD_PORT_PARAM_NEW && cmd != DEVLINK_CMD_PORT_PARAM_DEL); - ASSERT_DEVLINK_REGISTERED(devlink); + + /* devlink_notify_register() / devlink_notify_unregister() + * will replay the notifications if the params are added/removed + * outside of the lifetime of the instance. + */ + if (!devl_is_registered(devlink)) + return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -5670,48 +5285,41 @@ static void devlink_param_notify(struct devlink *devlink, msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } -static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +static int +devlink_nl_cmd_param_get_dump_one(struct sk_buff *msg, struct devlink *devlink, + struct netlink_callback *cb) { + struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_param_item *param_item; - struct devlink *devlink; - int start = cb->args[0]; - unsigned long index; int idx = 0; int err = 0; - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - devl_lock(devlink); - list_for_each_entry(param_item, &devlink->param_list, list) { - if (idx < start) { - idx++; - continue; - } - err = devlink_nl_param_fill(msg, devlink, 0, param_item, - DEVLINK_CMD_PARAM_GET, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI); - if (err == -EOPNOTSUPP) { - err = 0; - } else if (err) { - devl_unlock(devlink); - devlink_put(devlink); - goto out; - } + list_for_each_entry(param_item, &devlink->param_list, list) { + if (idx < state->idx) { idx++; + continue; } - devl_unlock(devlink); - devlink_put(devlink); + err = devlink_nl_param_fill(msg, devlink, 0, param_item, + DEVLINK_CMD_PARAM_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { + state->idx = idx; + break; + } + idx++; } -out: - if (err != -EMSGSIZE) - return err; - cb->args[0] = idx; - return msg->len; + return err; } +const struct devlink_gen_cmd devl_gen_param = { + .dump_one = devlink_nl_cmd_param_get_dump_one, +}; + static int devlink_param_type_get_from_info(struct genl_info *info, enum devlink_param_type *param_type) @@ -6375,21 +5983,20 @@ out: return err; } -static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg, - struct netlink_callback *cb, - struct devlink *devlink, - int *idx, - int start) +static int +devlink_nl_cmd_region_get_dump_one(struct sk_buff *msg, struct devlink *devlink, + struct netlink_callback *cb) { + struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_region *region; struct devlink_port *port; unsigned long port_index; - int err = 0; + int idx = 0; + int err; - devl_lock(devlink); list_for_each_entry(region, &devlink->region_list, list) { - if (*idx < start) { - (*idx)++; + if (idx < state->idx) { + idx++; continue; } err = devlink_nl_region_fill(msg, devlink, @@ -6397,43 +6004,28 @@ static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, region); - if (err) - goto out; - (*idx)++; + if (err) { + state->idx = idx; + return err; + } + idx++; } xa_for_each(&devlink->ports, port_index, port) { - err = devlink_nl_cmd_region_get_port_dumpit(msg, cb, port, idx, - start); - if (err) - goto out; + err = devlink_nl_cmd_region_get_port_dumpit(msg, cb, port, &idx, + state->idx); + if (err) { + state->idx = idx; + return err; + } } -out: - devl_unlock(devlink); - return err; + return 0; } -static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) -{ - struct devlink *devlink; - int start = cb->args[0]; - unsigned long index; - int idx = 0; - int err = 0; - - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - err = devlink_nl_cmd_region_get_devlink_dumpit(msg, cb, devlink, - &idx, start); - devlink_put(devlink); - if (err) - goto out; - } -out: - cb->args[0] = idx; - return msg->len; -} +const struct devlink_gen_cmd devl_gen_region = { + .dump_one = devlink_nl_cmd_region_get_dump_one, +}; static int devlink_nl_cmd_region_del(struct sk_buff *skb, struct genl_info *info) @@ -6716,6 +6308,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { const struct genl_dumpit_info *info = genl_dumpit_info(cb); + struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct nlattr *chunks_attr, *region_attr, *snapshot_attr; u64 ret_offset, start_offset, end_offset = U64_MAX; struct nlattr **attrs = info->attrs; @@ -6729,14 +6322,12 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, void *hdr; int err; - start_offset = *((u64 *)&cb->args[0]); + start_offset = state->start_offset; - devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); + devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs); if (IS_ERR(devlink)) return PTR_ERR(devlink); - devl_lock(devlink); - if (!attrs[DEVLINK_ATTR_REGION_NAME]) { NL_SET_ERR_MSG(cb->extack, "No region name provided"); err = -EINVAL; @@ -6868,7 +6459,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto nla_put_failure; } - *((u64 *)&cb->args[0]) = ret_offset; + state->start_offset = ret_offset; nla_nest_end(skb, chunks_attr); genlmsg_end(skb, hdr); @@ -7064,43 +6655,25 @@ static int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, return genlmsg_reply(msg, info); } -static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +static int +devlink_nl_cmd_info_get_dump_one(struct sk_buff *msg, struct devlink *devlink, + struct netlink_callback *cb) { - struct devlink *devlink; - int start = cb->args[0]; - unsigned long index; - int idx = 0; - int err = 0; - - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - if (idx < start) - goto inc; - - devl_lock(devlink); - err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - cb->extack); - devl_unlock(devlink); - if (err == -EOPNOTSUPP) - err = 0; - else if (err) { - devlink_put(devlink); - break; - } -inc: - idx++; - devlink_put(devlink); - } - - if (err != -EMSGSIZE) - return err; + int err; - cb->args[0] = idx; - return msg->len; + err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + cb->extack); + if (err == -EOPNOTSUPP) + err = 0; + return err; } +const struct devlink_gen_cmd devl_gen_info = { + .dump_one = devlink_nl_cmd_info_get_dump_one, +}; + struct devlink_fmsg_item { struct list_head list; int attrtype; @@ -7666,7 +7239,8 @@ static int devlink_fmsg_dumpit(struct devlink_fmsg *fmsg, struct sk_buff *skb, struct netlink_callback *cb, enum devlink_command cmd) { - int index = cb->args[0]; + struct devlink_nl_dump_state *state = devlink_dump_state(cb); + int index = state->idx; int tmp_index = index; void *hdr; int err; @@ -7682,7 +7256,7 @@ static int devlink_fmsg_dumpit(struct devlink_fmsg *fmsg, struct sk_buff *skb, if ((err && err != -EMSGSIZE) || tmp_index == index) goto nla_put_failure; - cb->args[0] = index; + state->idx = index; genlmsg_end(skb, hdr); return skb->len; @@ -8169,9 +7743,10 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb) struct nlattr **attrs = info->attrs; struct devlink *devlink; - devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); + devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs); if (IS_ERR(devlink)) return NULL; + devl_unlock(devlink); reporter = devlink_health_reporter_get_from_attrs(devlink, attrs); devlink_put(devlink); @@ -8233,19 +7808,26 @@ static int devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { - struct devlink_health_reporter *reporter; - unsigned long index, port_index; - struct devlink_port *port; + struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink *devlink; - int start = cb->args[0]; - int idx = 0; int err; - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { + devlink_dump_for_each_instance_get(msg, state, devlink) { + struct devlink_health_reporter *reporter; + struct devlink_port *port; + unsigned long port_index; + int idx = 0; + mutex_lock(&devlink->reporters_lock); + if (!devl_is_registered(devlink)) { + mutex_unlock(&devlink->reporters_lock); + devlink_put(devlink); + continue; + } + list_for_each_entry(reporter, &devlink->reporter_list, list) { - if (idx < start) { + if (idx < state->idx) { idx++; continue; } @@ -8256,20 +7838,21 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, if (err) { mutex_unlock(&devlink->reporters_lock); devlink_put(devlink); + state->idx = idx; goto out; } idx++; } mutex_unlock(&devlink->reporters_lock); - devlink_put(devlink); - } - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { devl_lock(devlink); + if (!devl_is_registered(devlink)) + goto next_devlink; + xa_for_each(&devlink->ports, port_index, port) { mutex_lock(&port->reporters_lock); list_for_each_entry(reporter, &port->reporter_list, list) { - if (idx < start) { + if (idx < state->idx) { idx++; continue; } @@ -8282,17 +7865,18 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, mutex_unlock(&port->reporters_lock); devl_unlock(devlink); devlink_put(devlink); + state->idx = idx; goto out; } idx++; } mutex_unlock(&port->reporters_lock); } +next_devlink: devl_unlock(devlink); devlink_put(devlink); } out: - cb->args[0] = idx; return msg->len; } @@ -8404,8 +7988,8 @@ static int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { + struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_health_reporter *reporter; - u64 start = cb->args[0]; int err; reporter = devlink_health_reporter_get_from_cb(cb); @@ -8417,13 +8001,13 @@ devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb, goto out; } mutex_lock(&reporter->dump_lock); - if (!start) { + if (!state->idx) { err = devlink_health_do_dump(reporter, NULL, cb->extack); if (err) goto unlock; - cb->args[1] = reporter->dump_ts; + state->dump_ts = reporter->dump_ts; } - if (!reporter->dump_fmsg || cb->args[1] != reporter->dump_ts) { + if (!reporter->dump_fmsg || state->dump_ts != reporter->dump_ts) { NL_SET_ERR_MSG_MOD(cb->extack, "Dump trampled, please retry"); err = -EAGAIN; goto unlock; @@ -8814,43 +8398,39 @@ err_trap_fill: return err; } -static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +static int +devlink_nl_cmd_trap_get_dump_one(struct sk_buff *msg, struct devlink *devlink, + struct netlink_callback *cb) { + struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_trap_item *trap_item; - struct devlink *devlink; - int start = cb->args[0]; - unsigned long index; int idx = 0; - int err; + int err = 0; - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - devl_lock(devlink); - list_for_each_entry(trap_item, &devlink->trap_list, list) { - if (idx < start) { - idx++; - continue; - } - err = devlink_nl_trap_fill(msg, devlink, trap_item, - DEVLINK_CMD_TRAP_NEW, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI); - if (err) { - devl_unlock(devlink); - devlink_put(devlink); - goto out; - } + list_for_each_entry(trap_item, &devlink->trap_list, list) { + if (idx < state->idx) { idx++; + continue; } - devl_unlock(devlink); - devlink_put(devlink); + err = devlink_nl_trap_fill(msg, devlink, trap_item, + DEVLINK_CMD_TRAP_NEW, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + state->idx = idx; + break; + } + idx++; } -out: - cb->args[0] = idx; - return msg->len; + + return err; } +const struct devlink_gen_cmd devl_gen_trap = { + .dump_one = devlink_nl_cmd_trap_get_dump_one, +}; + static int __devlink_trap_action_set(struct devlink *devlink, struct devlink_trap_item *trap_item, enum devlink_trap_action trap_action, @@ -9029,46 +8609,41 @@ err_trap_group_fill: return err; } -static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +static int +devlink_nl_cmd_trap_group_get_dump_one(struct sk_buff *msg, + struct devlink *devlink, + struct netlink_callback *cb) { - enum devlink_command cmd = DEVLINK_CMD_TRAP_GROUP_NEW; + struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_trap_group_item *group_item; - u32 portid = NETLINK_CB(cb->skb).portid; - struct devlink *devlink; - int start = cb->args[0]; - unsigned long index; int idx = 0; - int err; + int err = 0; - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - devl_lock(devlink); - list_for_each_entry(group_item, &devlink->trap_group_list, - list) { - if (idx < start) { - idx++; - continue; - } - err = devlink_nl_trap_group_fill(msg, devlink, - group_item, cmd, - portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI); - if (err) { - devl_unlock(devlink); - devlink_put(devlink); - goto out; - } + + list_for_each_entry(group_item, &devlink->trap_group_list, list) { + if (idx < state->idx) { idx++; + continue; } - devl_unlock(devlink); - devlink_put(devlink); + err = devlink_nl_trap_group_fill(msg, devlink, group_item, + DEVLINK_CMD_TRAP_GROUP_NEW, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + state->idx = idx; + break; + } + idx++; } -out: - cb->args[0] = idx; - return msg->len; + + return err; } +const struct devlink_gen_cmd devl_gen_trap_group = { + .dump_one = devlink_nl_cmd_trap_group_get_dump_one, +}; + static int __devlink_trap_group_action_set(struct devlink *devlink, struct devlink_trap_group_item *group_item, @@ -9333,46 +8908,40 @@ err_trap_policer_fill: return err; } -static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +static int +devlink_nl_cmd_trap_policer_get_dump_one(struct sk_buff *msg, + struct devlink *devlink, + struct netlink_callback *cb) { - enum devlink_command cmd = DEVLINK_CMD_TRAP_POLICER_NEW; + struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_trap_policer_item *policer_item; - u32 portid = NETLINK_CB(cb->skb).portid; - struct devlink *devlink; - int start = cb->args[0]; - unsigned long index; int idx = 0; - int err; + int err = 0; - devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - devl_lock(devlink); - list_for_each_entry(policer_item, &devlink->trap_policer_list, - list) { - if (idx < start) { - idx++; - continue; - } - err = devlink_nl_trap_policer_fill(msg, devlink, - policer_item, cmd, - portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI); - if (err) { - devl_unlock(devlink); - devlink_put(devlink); - goto out; - } + list_for_each_entry(policer_item, &devlink->trap_policer_list, list) { + if (idx < state->idx) { idx++; + continue; } - devl_unlock(devlink); - devlink_put(devlink); + err = devlink_nl_trap_policer_fill(msg, devlink, policer_item, + DEVLINK_CMD_TRAP_POLICER_NEW, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + state->idx = idx; + break; + } + idx++; } -out: - cb->args[0] = idx; - return msg->len; + + return err; } +const struct devlink_gen_cmd devl_gen_trap_policer = { + .dump_one = devlink_nl_cmd_trap_policer_get_dump_one, +}; + static int devlink_trap_policer_set(struct devlink *devlink, struct devlink_trap_policer_item *policer_item, @@ -9445,88 +9014,19 @@ static int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb, return devlink_trap_policer_set(devlink, policer_item, info); } -static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_UNSPEC] = { .strict_start_type = - DEVLINK_ATTR_TRAP_POLICER_ID }, - [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 }, - [DEVLINK_ATTR_PORT_TYPE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_PORT_TYPE_AUTO, - DEVLINK_PORT_TYPE_IB), - [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 }, - [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 }, - [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 }, - [DEVLINK_ATTR_SB_POOL_TYPE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32 }, - [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 }, - [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 }, - [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_ESWITCH_MODE_LEGACY, - DEVLINK_ESWITCH_MODE_SWITCHDEV), - [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 }, - [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64}, - [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64}, - [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 }, - [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 }, - [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 }, - [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 }, - [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 }, - [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] = - NLA_POLICY_BITFIELD32(DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS), - [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 }, - [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 }, - [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 }, - [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 }, - [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8 }, - [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32 }, - [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64 }, - [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64 }, - [DEVLINK_ATTR_PORT_FUNCTION] = { .type = NLA_NESTED }, - [DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, DEVLINK_RELOAD_ACTION_DRIVER_REINIT, - DEVLINK_RELOAD_ACTION_MAX), - [DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(DEVLINK_RELOAD_LIMITS_VALID_MASK), - [DEVLINK_ATTR_PORT_FLAVOUR] = { .type = NLA_U16 }, - [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 }, - [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 }, - [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 }, - [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 }, - [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 }, - [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 }, - [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 }, - [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED }, - [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32 }, - [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32 }, - [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG }, -}; - -static const struct genl_small_ops devlink_nl_ops[] = { +const struct genl_small_ops devlink_nl_ops[56] = { { .cmd = DEVLINK_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_get_doit, - .dumpit = devlink_nl_cmd_get_dumpit, + .dumpit = devlink_nl_instance_iter_dump, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_PORT_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_get_doit, - .dumpit = devlink_nl_cmd_port_get_dumpit, + .dumpit = devlink_nl_instance_iter_dump, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, /* can be retrieved by unprivileged users */ }, @@ -9540,7 +9040,7 @@ static const struct genl_small_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_RATE_GET, .doit = devlink_nl_cmd_rate_get_doit, - .dumpit = devlink_nl_cmd_rate_get_dumpit, + .dumpit = devlink_nl_instance_iter_dump, .internal_flags = DEVLINK_NL_FLAG_NEED_RATE, /* can be retrieved by unprivileged users */ }, @@ -9602,14 +9102,14 @@ static const struct genl_small_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_SB_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_get_doit, - .dumpit = devlink_nl_cmd_sb_get_dumpit, + .dumpit = devlink_nl_instance_iter_dump, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_SB_POOL_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_pool_get_doit, - .dumpit = devlink_nl_cmd_sb_pool_get_dumpit, + .dumpit = devlink_nl_instance_iter_dump, /* can be retrieved by unprivileged users */ }, { @@ -9622,7 +9122,7 @@ static const struct genl_small_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_SB_PORT_POOL_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_port_pool_get_doit, - .dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit, + .dumpit = devlink_nl_instance_iter_dump, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, /* can be retrieved by unprivileged users */ }, @@ -9637,7 +9137,7 @@ static const struct genl_small_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit, - .dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit, + .dumpit = devlink_nl_instance_iter_dump, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, /* can be retrieved by unprivileged users */ }, @@ -9718,7 +9218,7 @@ static const struct genl_small_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_PARAM_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_param_get_doit, - .dumpit = devlink_nl_cmd_param_get_dumpit, + .dumpit = devlink_nl_instance_iter_dump, /* can be retrieved by unprivileged users */ }, { @@ -9746,7 +9246,7 @@ static const struct genl_small_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_REGION_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_region_get_doit, - .dumpit = devlink_nl_cmd_region_get_dumpit, + .dumpit = devlink_nl_instance_iter_dump, .flags = GENL_ADMIN_PERM, }, { @@ -9772,7 +9272,7 @@ static const struct genl_small_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_INFO_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_info_get_doit, - .dumpit = devlink_nl_cmd_info_get_dumpit, + .dumpit = devlink_nl_instance_iter_dump, /* can be retrieved by unprivileged users */ }, { @@ -9834,7 +9334,7 @@ static const struct genl_small_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_TRAP_GET, .doit = devlink_nl_cmd_trap_get_doit, - .dumpit = devlink_nl_cmd_trap_get_dumpit, + .dumpit = devlink_nl_instance_iter_dump, /* can be retrieved by unprivileged users */ }, { @@ -9845,7 +9345,7 @@ static const struct genl_small_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_TRAP_GROUP_GET, .doit = devlink_nl_cmd_trap_group_get_doit, - .dumpit = devlink_nl_cmd_trap_group_get_dumpit, + .dumpit = devlink_nl_instance_iter_dump, /* can be retrieved by unprivileged users */ }, { @@ -9856,7 +9356,7 @@ static const struct genl_small_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_TRAP_POLICER_GET, .doit = devlink_nl_cmd_trap_policer_get_doit, - .dumpit = devlink_nl_cmd_trap_policer_get_dumpit, + .dumpit = devlink_nl_instance_iter_dump, /* can be retrieved by unprivileged users */ }, { @@ -9867,7 +9367,7 @@ static const struct genl_small_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_SELFTESTS_GET, .doit = devlink_nl_cmd_selftests_get_doit, - .dumpit = devlink_nl_cmd_selftests_get_dumpit + .dumpit = devlink_nl_instance_iter_dump, /* can be retrieved by unprivileged users */ }, { @@ -9875,26 +9375,10 @@ static const struct genl_small_ops devlink_nl_ops[] = { .doit = devlink_nl_cmd_selftests_run, .flags = GENL_ADMIN_PERM, }, + /* -- No new ops here! Use split ops going forward! -- */ }; -static struct genl_family devlink_nl_family __ro_after_init = { - .name = DEVLINK_GENL_NAME, - .version = DEVLINK_GENL_VERSION, - .maxattr = DEVLINK_ATTR_MAX, - .policy = devlink_nl_policy, - .netnsok = true, - .parallel_ops = true, - .pre_doit = devlink_nl_pre_doit, - .post_doit = devlink_nl_post_doit, - .module = THIS_MODULE, - .small_ops = devlink_nl_ops, - .n_small_ops = ARRAY_SIZE(devlink_nl_ops), - .resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1, - .mcgrps = devlink_nl_mcgrps, - .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps), -}; - -static bool devlink_reload_actions_valid(const struct devlink_ops *ops) +bool devlink_reload_actions_valid(const struct devlink_ops *ops) { const struct devlink_reload_combination *comb; int i; @@ -9923,100 +9407,6 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops) return true; } -/** - * devlink_set_features - Set devlink supported features - * - * @devlink: devlink - * @features: devlink support features - * - * This interface allows us to set reload ops separatelly from - * the devlink_alloc. - */ -void devlink_set_features(struct devlink *devlink, u64 features) -{ - ASSERT_DEVLINK_NOT_REGISTERED(devlink); - - WARN_ON(features & DEVLINK_F_RELOAD && - !devlink_reload_supported(devlink->ops)); - devlink->features = features; -} -EXPORT_SYMBOL_GPL(devlink_set_features); - -static int devlink_netdevice_event(struct notifier_block *nb, - unsigned long event, void *ptr); - -/** - * devlink_alloc_ns - Allocate new devlink instance resources - * in specific namespace - * - * @ops: ops - * @priv_size: size of user private data - * @net: net namespace - * @dev: parent device - * - * Allocate new devlink instance resources, including devlink index - * and name. - */ -struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, - size_t priv_size, struct net *net, - struct device *dev) -{ - struct devlink *devlink; - static u32 last_id; - int ret; - - WARN_ON(!ops || !dev); - if (!devlink_reload_actions_valid(ops)) - return NULL; - - devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL); - if (!devlink) - return NULL; - - ret = xa_alloc_cyclic(&devlinks, &devlink->index, devlink, xa_limit_31b, - &last_id, GFP_KERNEL); - if (ret < 0) - goto err_xa_alloc; - - devlink->netdevice_nb.notifier_call = devlink_netdevice_event; - ret = register_netdevice_notifier_net(net, &devlink->netdevice_nb); - if (ret) - goto err_register_netdevice_notifier; - - devlink->dev = dev; - devlink->ops = ops; - xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC); - xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); - write_pnet(&devlink->_net, net); - INIT_LIST_HEAD(&devlink->rate_list); - INIT_LIST_HEAD(&devlink->linecard_list); - INIT_LIST_HEAD(&devlink->sb_list); - INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list); - INIT_LIST_HEAD(&devlink->resource_list); - INIT_LIST_HEAD(&devlink->param_list); - INIT_LIST_HEAD(&devlink->region_list); - INIT_LIST_HEAD(&devlink->reporter_list); - INIT_LIST_HEAD(&devlink->trap_list); - INIT_LIST_HEAD(&devlink->trap_group_list); - INIT_LIST_HEAD(&devlink->trap_policer_list); - lockdep_register_key(&devlink->lock_key); - mutex_init(&devlink->lock); - lockdep_set_class(&devlink->lock, &devlink->lock_key); - mutex_init(&devlink->reporters_lock); - mutex_init(&devlink->linecards_lock); - refcount_set(&devlink->refcount, 1); - init_completion(&devlink->comp); - - return devlink; - -err_register_netdevice_notifier: - xa_erase(&devlinks, devlink->index); -err_xa_alloc: - kfree(devlink); - return NULL; -} -EXPORT_SYMBOL_GPL(devlink_alloc_ns); - static void devlink_trap_policer_notify(struct devlink *devlink, const struct devlink_trap_policer_item *policer_item, @@ -10029,7 +9419,7 @@ static void devlink_trap_notify(struct devlink *devlink, const struct devlink_trap_item *trap_item, enum devlink_command cmd); -static void devlink_notify_register(struct devlink *devlink) +void devlink_notify_register(struct devlink *devlink) { struct devlink_trap_policer_item *policer_item; struct devlink_trap_group_item *group_item; @@ -10070,7 +9460,7 @@ static void devlink_notify_register(struct devlink *devlink) DEVLINK_CMD_PARAM_NEW); } -static void devlink_notify_unregister(struct devlink *devlink) +void devlink_notify_unregister(struct devlink *devlink) { struct devlink_trap_policer_item *policer_item; struct devlink_trap_group_item *group_item; @@ -10107,79 +9497,6 @@ static void devlink_notify_unregister(struct devlink *devlink) devlink_notify(devlink, DEVLINK_CMD_DEL); } -/** - * devlink_register - Register devlink instance - * - * @devlink: devlink - */ -void devlink_register(struct devlink *devlink) -{ - ASSERT_DEVLINK_NOT_REGISTERED(devlink); - /* Make sure that we are in .probe() routine */ - - xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); - devlink_notify_register(devlink); -} -EXPORT_SYMBOL_GPL(devlink_register); - -/** - * devlink_unregister - Unregister devlink instance - * - * @devlink: devlink - */ -void devlink_unregister(struct devlink *devlink) -{ - ASSERT_DEVLINK_REGISTERED(devlink); - /* Make sure that we are in .remove() routine */ - - xa_set_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING); - devlink_put(devlink); - wait_for_completion(&devlink->comp); - - devlink_notify_unregister(devlink); - xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); - xa_clear_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING); -} -EXPORT_SYMBOL_GPL(devlink_unregister); - -/** - * devlink_free - Free devlink instance resources - * - * @devlink: devlink - */ -void devlink_free(struct devlink *devlink) -{ - ASSERT_DEVLINK_NOT_REGISTERED(devlink); - - mutex_destroy(&devlink->linecards_lock); - mutex_destroy(&devlink->reporters_lock); - mutex_destroy(&devlink->lock); - lockdep_unregister_key(&devlink->lock_key); - WARN_ON(!list_empty(&devlink->trap_policer_list)); - WARN_ON(!list_empty(&devlink->trap_group_list)); - WARN_ON(!list_empty(&devlink->trap_list)); - WARN_ON(!list_empty(&devlink->reporter_list)); - WARN_ON(!list_empty(&devlink->region_list)); - WARN_ON(!list_empty(&devlink->param_list)); - WARN_ON(!list_empty(&devlink->resource_list)); - WARN_ON(!list_empty(&devlink->dpipe_table_list)); - WARN_ON(!list_empty(&devlink->sb_list)); - WARN_ON(!list_empty(&devlink->rate_list)); - WARN_ON(!list_empty(&devlink->linecard_list)); - WARN_ON(!xa_empty(&devlink->ports)); - - xa_destroy(&devlink->snapshot_ids); - xa_destroy(&devlink->ports); - - WARN_ON_ONCE(unregister_netdevice_notifier_net(devlink_net(devlink), - &devlink->netdevice_nb)); - - xa_erase(&devlinks, devlink->index); - - kfree(devlink); -} -EXPORT_SYMBOL_GPL(devlink_free); - static void devlink_port_type_warn(struct work_struct *work) { WARN(true, "Type was not set for devlink port."); @@ -10480,8 +9797,8 @@ void devlink_port_type_clear(struct devlink_port *devlink_port) } EXPORT_SYMBOL_GPL(devlink_port_type_clear); -static int devlink_netdevice_event(struct notifier_block *nb, - unsigned long event, void *ptr) +int devlink_port_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) { struct net_device *netdev = netdev_notifier_info_to_dev(ptr); struct devlink_port *devlink_port = netdev->devlink_port; @@ -11604,8 +10921,6 @@ int devlink_params_register(struct devlink *devlink, const struct devlink_param *param = params; int i, err; - ASSERT_DEVLINK_NOT_REGISTERED(devlink); - for (i = 0; i < params_count; i++, param++) { err = devlink_param_register(devlink, param); if (err) @@ -11636,8 +10951,6 @@ void devlink_params_unregister(struct devlink *devlink, const struct devlink_param *param = params; int i; - ASSERT_DEVLINK_NOT_REGISTERED(devlink); - for (i = 0; i < params_count; i++, param++) devlink_param_unregister(devlink, param); } @@ -11657,8 +10970,6 @@ int devlink_param_register(struct devlink *devlink, { struct devlink_param_item *param_item; - ASSERT_DEVLINK_NOT_REGISTERED(devlink); - WARN_ON(devlink_param_verify(param)); WARN_ON(devlink_param_find_by_name(&devlink->param_list, param->name)); @@ -11674,6 +10985,7 @@ int devlink_param_register(struct devlink *devlink, param_item->param = param; list_add_tail(¶m_item->list, &devlink->param_list); + devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW); return 0; } EXPORT_SYMBOL_GPL(devlink_param_register); @@ -11688,11 +11000,10 @@ void devlink_param_unregister(struct devlink *devlink, { struct devlink_param_item *param_item; - ASSERT_DEVLINK_NOT_REGISTERED(devlink); - param_item = devlink_param_find_by_name(&devlink->param_list, param->name); WARN_ON(!param_item); + devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_DEL); list_del(¶m_item->list); kfree(param_item); } @@ -11752,8 +11063,6 @@ int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, { struct devlink_param_item *param_item; - ASSERT_DEVLINK_NOT_REGISTERED(devlink); - param_item = devlink_param_find_by_id(&devlink->param_list, param_id); if (!param_item) return -EINVAL; @@ -11767,6 +11076,8 @@ int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, else param_item->driverinit_value = init_val; param_item->driverinit_value_valid = true; + + devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW); return 0; } EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set); @@ -12921,7 +12232,8 @@ void devlink_compat_running_version(struct devlink *devlink, return; devl_lock(devlink); - __devlink_compat_running_version(devlink, buf, len); + if (devl_is_registered(devlink)) + __devlink_compat_running_version(devlink, buf, len); devl_unlock(devlink); } @@ -12930,20 +12242,28 @@ int devlink_compat_flash_update(struct devlink *devlink, const char *file_name) struct devlink_flash_update_params params = {}; int ret; - if (!devlink->ops->flash_update) - return -EOPNOTSUPP; + devl_lock(devlink); + if (!devl_is_registered(devlink)) { + ret = -ENODEV; + goto out_unlock; + } + + if (!devlink->ops->flash_update) { + ret = -EOPNOTSUPP; + goto out_unlock; + } ret = request_firmware(¶ms.fw, file_name, devlink->dev); if (ret) - return ret; + goto out_unlock; - devl_lock(devlink); devlink_flash_update_begin_notify(devlink); ret = devlink->ops->flash_update(devlink, ¶ms, NULL); devlink_flash_update_end_notify(devlink); - devl_unlock(devlink); release_firmware(params.fw); +out_unlock: + devl_unlock(devlink); return ret; } @@ -12983,47 +12303,3 @@ int devlink_compat_switch_id_get(struct net_device *dev, return 0; } - -static void __net_exit devlink_pernet_pre_exit(struct net *net) -{ - struct devlink *devlink; - u32 actions_performed; - unsigned long index; - int err; - - /* In case network namespace is getting destroyed, reload - * all devlink instances from this namespace into init_net. - */ - devlinks_xa_for_each_registered_get(net, index, devlink) { - WARN_ON(!(devlink->features & DEVLINK_F_RELOAD)); - mutex_lock(&devlink->lock); - err = devlink_reload(devlink, &init_net, - DEVLINK_RELOAD_ACTION_DRIVER_REINIT, - DEVLINK_RELOAD_LIMIT_UNSPEC, - &actions_performed, NULL); - mutex_unlock(&devlink->lock); - if (err && err != -EOPNOTSUPP) - pr_warn("Failed to reload devlink instance into init_net\n"); - devlink_put(devlink); - } -} - -static struct pernet_operations devlink_pernet_ops __net_initdata = { - .pre_exit = devlink_pernet_pre_exit, -}; - -static int __init devlink_init(void) -{ - int err; - - err = genl_register_family(&devlink_nl_family); - if (err) - goto out; - err = register_pernet_subsys(&devlink_pernet_ops); - -out: - WARN_ON(err); - return err; -} - -subsys_initcall(devlink_init); diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c new file mode 100644 index 000000000000..b5b8ac6db2d1 --- /dev/null +++ b/net/devlink/netlink.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> + */ + +#include <net/genetlink.h> +#include <net/sock.h> + +#include "devl_internal.h" + +static const struct genl_multicast_group devlink_nl_mcgrps[] = { + [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME }, +}; + +static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_UNSPEC] = { .strict_start_type = + DEVLINK_ATTR_TRAP_POLICER_ID }, + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 }, + [DEVLINK_ATTR_PORT_TYPE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_PORT_TYPE_AUTO, + DEVLINK_PORT_TYPE_IB), + [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 }, + [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 }, + [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 }, + [DEVLINK_ATTR_SB_POOL_TYPE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32 }, + [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 }, + [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 }, + [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_ESWITCH_MODE_LEGACY, + DEVLINK_ESWITCH_MODE_SWITCHDEV), + [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 }, + [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64}, + [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64}, + [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 }, + [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 }, + [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 }, + [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 }, + [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 }, + [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] = + NLA_POLICY_BITFIELD32(DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS), + [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 }, + [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 }, + [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 }, + [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 }, + [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8 }, + [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32 }, + [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64 }, + [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64 }, + [DEVLINK_ATTR_PORT_FUNCTION] = { .type = NLA_NESTED }, + [DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, DEVLINK_RELOAD_ACTION_DRIVER_REINIT, + DEVLINK_RELOAD_ACTION_MAX), + [DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(DEVLINK_RELOAD_LIMITS_VALID_MASK), + [DEVLINK_ATTR_PORT_FLAVOUR] = { .type = NLA_U16 }, + [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 }, + [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 }, + [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 }, + [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 }, + [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 }, + [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 }, + [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 }, + [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED }, + [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32 }, + [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32 }, + [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG }, +}; + +struct devlink * +devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs) +{ + struct devlink *devlink; + unsigned long index; + char *busname; + char *devname; + + if (!attrs[DEVLINK_ATTR_BUS_NAME] || !attrs[DEVLINK_ATTR_DEV_NAME]) + return ERR_PTR(-EINVAL); + + busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]); + devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]); + + devlinks_xa_for_each_registered_get(net, index, devlink) { + devl_lock(devlink); + if (devl_is_registered(devlink) && + strcmp(devlink->dev->bus->name, busname) == 0 && + strcmp(dev_name(devlink->dev), devname) == 0) + return devlink; + devl_unlock(devlink); + devlink_put(devlink); + } + + return ERR_PTR(-ENODEV); +} + +static int devlink_nl_pre_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + struct devlink_linecard *linecard; + struct devlink_port *devlink_port; + struct devlink *devlink; + int err; + + devlink = devlink_get_from_attrs_lock(genl_info_net(info), info->attrs); + if (IS_ERR(devlink)) + return PTR_ERR(devlink); + + info->user_ptr[0] = devlink; + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { + devlink_port = devlink_port_get_from_info(devlink, info); + if (IS_ERR(devlink_port)) { + err = PTR_ERR(devlink_port); + goto unlock; + } + info->user_ptr[1] = devlink_port; + } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) { + devlink_port = devlink_port_get_from_info(devlink, info); + if (!IS_ERR(devlink_port)) + info->user_ptr[1] = devlink_port; + } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) { + struct devlink_rate *devlink_rate; + + devlink_rate = devlink_rate_get_from_info(devlink, info); + if (IS_ERR(devlink_rate)) { + err = PTR_ERR(devlink_rate); + goto unlock; + } + info->user_ptr[1] = devlink_rate; + } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE_NODE) { + struct devlink_rate *rate_node; + + rate_node = devlink_rate_node_get_from_info(devlink, info); + if (IS_ERR(rate_node)) { + err = PTR_ERR(rate_node); + goto unlock; + } + info->user_ptr[1] = rate_node; + } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) { + linecard = devlink_linecard_get_from_info(devlink, info); + if (IS_ERR(linecard)) { + err = PTR_ERR(linecard); + goto unlock; + } + info->user_ptr[1] = linecard; + } + return 0; + +unlock: + devl_unlock(devlink); + devlink_put(devlink); + return err; +} + +static void devlink_nl_post_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + struct devlink_linecard *linecard; + struct devlink *devlink; + + devlink = info->user_ptr[0]; + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) { + linecard = info->user_ptr[1]; + devlink_linecard_put(linecard); + } + devl_unlock(devlink); + devlink_put(devlink); +} + +static const struct devlink_gen_cmd *devl_gen_cmds[] = { + [DEVLINK_CMD_GET] = &devl_gen_inst, + [DEVLINK_CMD_PORT_GET] = &devl_gen_port, + [DEVLINK_CMD_SB_GET] = &devl_gen_sb, + [DEVLINK_CMD_SB_POOL_GET] = &devl_gen_sb_pool, + [DEVLINK_CMD_SB_PORT_POOL_GET] = &devl_gen_sb_port_pool, + [DEVLINK_CMD_SB_TC_POOL_BIND_GET] = &devl_gen_sb_tc_pool_bind, + [DEVLINK_CMD_PARAM_GET] = &devl_gen_param, + [DEVLINK_CMD_REGION_GET] = &devl_gen_region, + [DEVLINK_CMD_INFO_GET] = &devl_gen_info, + [DEVLINK_CMD_RATE_GET] = &devl_gen_rate_get, + [DEVLINK_CMD_TRAP_GET] = &devl_gen_trap, + [DEVLINK_CMD_TRAP_GROUP_GET] = &devl_gen_trap_group, + [DEVLINK_CMD_TRAP_POLICER_GET] = &devl_gen_trap_policer, + [DEVLINK_CMD_SELFTESTS_GET] = &devl_gen_selftests, +}; + +int devlink_nl_instance_iter_dump(struct sk_buff *msg, + struct netlink_callback *cb) +{ + const struct genl_dumpit_info *info = genl_dumpit_info(cb); + struct devlink_nl_dump_state *state = devlink_dump_state(cb); + const struct devlink_gen_cmd *cmd; + struct devlink *devlink; + int err = 0; + + cmd = devl_gen_cmds[info->op.cmd]; + + devlink_dump_for_each_instance_get(msg, state, devlink) { + devl_lock(devlink); + + if (devl_is_registered(devlink)) + err = cmd->dump_one(msg, devlink, cb); + else + err = 0; + + devl_unlock(devlink); + devlink_put(devlink); + + if (err) + break; + + /* restart sub-object walk for the next instance */ + state->idx = 0; + } + + if (err != -EMSGSIZE) + return err; + return msg->len; +} + +struct genl_family devlink_nl_family __ro_after_init = { + .name = DEVLINK_GENL_NAME, + .version = DEVLINK_GENL_VERSION, + .maxattr = DEVLINK_ATTR_MAX, + .policy = devlink_nl_policy, + .netnsok = true, + .parallel_ops = true, + .pre_doit = devlink_nl_pre_doit, + .post_doit = devlink_nl_post_doit, + .module = THIS_MODULE, + .small_ops = devlink_nl_ops, + .n_small_ops = ARRAY_SIZE(devlink_nl_ops), + .resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1, + .mcgrps = devlink_nl_mcgrps, + .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps), +}; diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 228f13df2e18..563864c1bf5a 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -8,4 +8,4 @@ ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o module.o \ - pse-pd.o + pse-pd.o plca.o diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 6f399afc2ff2..5fb19050991e 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -208,6 +208,9 @@ const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(800000, DR8_2, Full), __DEFINE_LINK_MODE_NAME(800000, SR8, Full), __DEFINE_LINK_MODE_NAME(800000, VR8, Full), + __DEFINE_LINK_MODE_NAME(10, T1S, Full), + __DEFINE_LINK_MODE_NAME(10, T1S, Half), + __DEFINE_LINK_MODE_NAME(10, T1S_P2MP, Half), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -244,6 +247,8 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_X 1 #define __LINK_MODE_LANES_FX 1 #define __LINK_MODE_LANES_T1L 1 +#define __LINK_MODE_LANES_T1S 1 +#define __LINK_MODE_LANES_T1S_P2MP 1 #define __LINK_MODE_LANES_VR8 8 #define __LINK_MODE_LANES_DR8_2 8 @@ -366,6 +371,9 @@ const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(800000, DR8_2, Full), __DEFINE_LINK_MODE_PARAMS(800000, SR8, Full), __DEFINE_LINK_MODE_PARAMS(800000, VR8, Full), + __DEFINE_LINK_MODE_PARAMS(10, T1S, Full), + __DEFINE_LINK_MODE_PARAMS(10, T1S, Half), + __DEFINE_LINK_MODE_PARAMS(10, T1S_P2MP, Half), }; static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index aee98be6237f..9f924875bba9 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -288,6 +288,8 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_MODULE_GET] = ðnl_module_request_ops, [ETHTOOL_MSG_PSE_GET] = ðnl_pse_request_ops, [ETHTOOL_MSG_RSS_GET] = ðnl_rss_request_ops, + [ETHTOOL_MSG_PLCA_GET_CFG] = ðnl_plca_cfg_request_ops, + [ETHTOOL_MSG_PLCA_GET_STATUS] = ðnl_plca_status_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -603,6 +605,7 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = { [ETHTOOL_MSG_EEE_NTF] = ðnl_eee_request_ops, [ETHTOOL_MSG_FEC_NTF] = ðnl_fec_request_ops, [ETHTOOL_MSG_MODULE_NTF] = ðnl_module_request_ops, + [ETHTOOL_MSG_PLCA_NTF] = ðnl_plca_cfg_request_ops, }; /* default notification handler */ @@ -696,6 +699,7 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = { [ETHTOOL_MSG_EEE_NTF] = ethnl_default_notify, [ETHTOOL_MSG_FEC_NTF] = ethnl_default_notify, [ETHTOOL_MSG_MODULE_NTF] = ethnl_default_notify, + [ETHTOOL_MSG_PLCA_NTF] = ethnl_default_notify, }; void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) @@ -1047,6 +1051,31 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_rss_get_policy, .maxattr = ARRAY_SIZE(ethnl_rss_get_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_PLCA_GET_CFG, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + .policy = ethnl_plca_get_cfg_policy, + .maxattr = ARRAY_SIZE(ethnl_plca_get_cfg_policy) - 1, + }, + { + .cmd = ETHTOOL_MSG_PLCA_SET_CFG, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_set_plca_cfg, + .policy = ethnl_plca_set_cfg_policy, + .maxattr = ARRAY_SIZE(ethnl_plca_set_cfg_policy) - 1, + }, + { + .cmd = ETHTOOL_MSG_PLCA_GET_STATUS, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + .policy = ethnl_plca_get_status_policy, + .maxattr = ARRAY_SIZE(ethnl_plca_get_status_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 3753787ba233..f271266f6e28 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -347,6 +347,8 @@ extern const struct ethnl_request_ops ethnl_phc_vclocks_request_ops; extern const struct ethnl_request_ops ethnl_module_request_ops; extern const struct ethnl_request_ops ethnl_pse_request_ops; extern const struct ethnl_request_ops ethnl_rss_request_ops; +extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops; +extern const struct ethnl_request_ops ethnl_plca_status_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@ -388,6 +390,9 @@ extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MO extern const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1]; extern const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1]; extern const struct nla_policy ethnl_rss_get_policy[ETHTOOL_A_RSS_CONTEXT + 1]; +extern const struct nla_policy ethnl_plca_get_cfg_policy[ETHTOOL_A_PLCA_HEADER + 1]; +extern const struct nla_policy ethnl_plca_set_cfg_policy[ETHTOOL_A_PLCA_MAX + 1]; +extern const struct nla_policy ethnl_plca_get_status_policy[ETHTOOL_A_PLCA_HEADER + 1]; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); @@ -408,6 +413,7 @@ int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info); int ethnl_set_module(struct sk_buff *skb, struct genl_info *info); int ethnl_set_pse(struct sk_buff *skb, struct genl_info *info); +int ethnl_set_plca_cfg(struct sk_buff *skb, struct genl_info *info); extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN]; extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN]; diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c new file mode 100644 index 000000000000..d9bb13ffc654 --- /dev/null +++ b/net/ethtool/plca.c @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/phy.h> +#include <linux/ethtool_netlink.h> + +#include "netlink.h" +#include "common.h" + +struct plca_req_info { + struct ethnl_req_info base; +}; + +struct plca_reply_data { + struct ethnl_reply_data base; + struct phy_plca_cfg plca_cfg; + struct phy_plca_status plca_st; +}; + +// Helpers ------------------------------------------------------------------ // + +#define PLCA_REPDATA(__reply_base) \ + container_of(__reply_base, struct plca_reply_data, base) + +static void plca_update_sint(int *dst, const struct nlattr *attr, + bool *mod) +{ + if (!attr) + return; + + *dst = nla_get_u32(attr); + *mod = true; +} + +// PLCA get configuration message ------------------------------------------- // + +const struct nla_policy ethnl_plca_get_cfg_policy[] = { + [ETHTOOL_A_PLCA_HEADER] = + NLA_POLICY_NESTED(ethnl_header_policy), +}; + +static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct plca_reply_data *data = PLCA_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + const struct ethtool_phy_ops *ops; + int ret; + + // check that the PHY device is available and connected + if (!dev->phydev) { + ret = -EOPNOTSUPP; + goto out; + } + + // note: rtnl_lock is held already by ethnl_default_doit + ops = ethtool_phy_ops; + if (!ops || !ops->get_plca_cfg) { + ret = -EOPNOTSUPP; + goto out; + } + + ret = ethnl_ops_begin(dev); + if (!ret) + goto out; + + memset(&data->plca_cfg, 0xff, + sizeof_field(struct plca_reply_data, plca_cfg)); + + ret = ops->get_plca_cfg(dev->phydev, &data->plca_cfg); + ethnl_ops_complete(dev); + +out: + return ret; +} + +static int plca_get_cfg_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + return nla_total_size(sizeof(u16)) + /* _VERSION */ + nla_total_size(sizeof(u8)) + /* _ENABLED */ + nla_total_size(sizeof(u32)) + /* _NODE_CNT */ + nla_total_size(sizeof(u32)) + /* _NODE_ID */ + nla_total_size(sizeof(u32)) + /* _TO_TIMER */ + nla_total_size(sizeof(u32)) + /* _BURST_COUNT */ + nla_total_size(sizeof(u32)); /* _BURST_TIMER */ +} + +static int plca_get_cfg_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct plca_reply_data *data = PLCA_REPDATA(reply_base); + const struct phy_plca_cfg *plca = &data->plca_cfg; + + if ((plca->version >= 0 && + nla_put_u16(skb, ETHTOOL_A_PLCA_VERSION, plca->version)) || + (plca->enabled >= 0 && + nla_put_u8(skb, ETHTOOL_A_PLCA_ENABLED, !!plca->enabled)) || + (plca->node_id >= 0 && + nla_put_u32(skb, ETHTOOL_A_PLCA_NODE_ID, plca->node_id)) || + (plca->node_cnt >= 0 && + nla_put_u32(skb, ETHTOOL_A_PLCA_NODE_CNT, plca->node_cnt)) || + (plca->to_tmr >= 0 && + nla_put_u32(skb, ETHTOOL_A_PLCA_TO_TMR, plca->to_tmr)) || + (plca->burst_cnt >= 0 && + nla_put_u32(skb, ETHTOOL_A_PLCA_BURST_CNT, plca->burst_cnt)) || + (plca->burst_tmr >= 0 && + nla_put_u32(skb, ETHTOOL_A_PLCA_BURST_TMR, plca->burst_tmr))) + return -EMSGSIZE; + + return 0; +}; + +const struct ethnl_request_ops ethnl_plca_cfg_request_ops = { + .request_cmd = ETHTOOL_MSG_PLCA_GET_CFG, + .reply_cmd = ETHTOOL_MSG_PLCA_GET_CFG_REPLY, + .hdr_attr = ETHTOOL_A_PLCA_HEADER, + .req_info_size = sizeof(struct plca_req_info), + .reply_data_size = sizeof(struct plca_reply_data), + + .prepare_data = plca_get_cfg_prepare_data, + .reply_size = plca_get_cfg_reply_size, + .fill_reply = plca_get_cfg_fill_reply, +}; + +// PLCA set configuration message ------------------------------------------- // + +const struct nla_policy ethnl_plca_set_cfg_policy[] = { + [ETHTOOL_A_PLCA_HEADER] = + NLA_POLICY_NESTED(ethnl_header_policy), + [ETHTOOL_A_PLCA_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1), + [ETHTOOL_A_PLCA_NODE_ID] = NLA_POLICY_MAX(NLA_U32, 255), + [ETHTOOL_A_PLCA_NODE_CNT] = NLA_POLICY_RANGE(NLA_U32, 1, 255), + [ETHTOOL_A_PLCA_TO_TMR] = NLA_POLICY_MAX(NLA_U32, 255), + [ETHTOOL_A_PLCA_BURST_CNT] = NLA_POLICY_MAX(NLA_U32, 255), + [ETHTOOL_A_PLCA_BURST_TMR] = NLA_POLICY_MAX(NLA_U32, 255), +}; + +int ethnl_set_plca_cfg(struct sk_buff *skb, struct genl_info *info) +{ + struct ethnl_req_info req_info = {}; + struct nlattr **tb = info->attrs; + const struct ethtool_phy_ops *ops; + struct phy_plca_cfg plca_cfg; + struct net_device *dev; + bool mod = false; + int ret; + + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_PLCA_HEADER], + genl_info_net(info), info->extack, + true); + if (!ret) + return ret; + + dev = req_info.dev; + + rtnl_lock(); + + // check that the PHY device is available and connected + if (!dev->phydev) { + ret = -EOPNOTSUPP; + goto out_rtnl; + } + + ops = ethtool_phy_ops; + if (!ops || !ops->set_plca_cfg) { + ret = -EOPNOTSUPP; + goto out_rtnl; + } + + ret = ethnl_ops_begin(dev); + if (!ret) + goto out_rtnl; + + memset(&plca_cfg, 0xff, sizeof(plca_cfg)); + plca_update_sint(&plca_cfg.enabled, tb[ETHTOOL_A_PLCA_ENABLED], &mod); + plca_update_sint(&plca_cfg.node_id, tb[ETHTOOL_A_PLCA_NODE_ID], &mod); + plca_update_sint(&plca_cfg.node_cnt, tb[ETHTOOL_A_PLCA_NODE_CNT], &mod); + plca_update_sint(&plca_cfg.to_tmr, tb[ETHTOOL_A_PLCA_TO_TMR], &mod); + plca_update_sint(&plca_cfg.burst_cnt, tb[ETHTOOL_A_PLCA_BURST_CNT], + &mod); + plca_update_sint(&plca_cfg.burst_tmr, tb[ETHTOOL_A_PLCA_BURST_TMR], + &mod); + + ret = 0; + if (!mod) + goto out_ops; + + ret = ops->set_plca_cfg(dev->phydev, &plca_cfg, info->extack); + if (!ret) + goto out_ops; + + ethtool_notify(dev, ETHTOOL_MSG_PLCA_NTF, NULL); + +out_ops: + ethnl_ops_complete(dev); +out_rtnl: + rtnl_unlock(); + ethnl_parse_header_dev_put(&req_info); + + return ret; +} + +// PLCA get status message -------------------------------------------------- // + +const struct nla_policy ethnl_plca_get_status_policy[] = { + [ETHTOOL_A_PLCA_HEADER] = + NLA_POLICY_NESTED(ethnl_header_policy), +}; + +static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct plca_reply_data *data = PLCA_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + const struct ethtool_phy_ops *ops; + int ret; + + // check that the PHY device is available and connected + if (!dev->phydev) { + ret = -EOPNOTSUPP; + goto out; + } + + // note: rtnl_lock is held already by ethnl_default_doit + ops = ethtool_phy_ops; + if (!ops || !ops->get_plca_status) { + ret = -EOPNOTSUPP; + goto out; + } + + ret = ethnl_ops_begin(dev); + if (!ret) + goto out; + + memset(&data->plca_st, 0xff, + sizeof_field(struct plca_reply_data, plca_st)); + + ret = ops->get_plca_status(dev->phydev, &data->plca_st); + ethnl_ops_complete(dev); +out: + return ret; +} + +static int plca_get_status_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + return nla_total_size(sizeof(u8)); /* _STATUS */ +} + +static int plca_get_status_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct plca_reply_data *data = PLCA_REPDATA(reply_base); + const u8 status = data->plca_st.pst; + + if (nla_put_u8(skb, ETHTOOL_A_PLCA_STATUS, !!status)) + return -EMSGSIZE; + + return 0; +}; + +const struct ethnl_request_ops ethnl_plca_status_request_ops = { + .request_cmd = ETHTOOL_MSG_PLCA_GET_STATUS, + .reply_cmd = ETHTOOL_MSG_PLCA_GET_STATUS_REPLY, + .hdr_attr = ETHTOOL_A_PLCA_HEADER, + .req_info_size = sizeof(struct plca_req_info), + .reply_data_size = sizeof(struct plca_reply_data), + + .prepare_data = plca_get_status_prepare_data, + .reply_size = plca_get_status_reply_size, + .fill_reply = plca_get_status_fill_reply, +}; diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index ff691d9f4a04..b1c028df686e 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -13,7 +13,7 @@ #include <net/rpl.h> struct rpl_iptunnel_encap { - struct ipv6_rpl_sr_hdr srh[0]; + DECLARE_FLEX_ARRAY(struct ipv6_rpl_sr_hdr, srh); }; struct rpl_lwt { diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 5ded85e2c374..b30cea2fbf3f 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1594,8 +1594,7 @@ mp_rst: TCPOLEN_MPTCP_PRIO, opts->backup, TCPOPT_NOP); - MPTCP_INC_STATS(sock_net((const struct sock *)tp), - MPTCP_MIB_MPPRIOTX); + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPPRIOTX); } mp_capable_done: diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 2ea7eae43bdb..b5505b8167f9 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1143,7 +1143,7 @@ void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ss if (!tcp_rtx_and_write_queues_empty(ssk)) { subflow->stale = 1; __mptcp_retransmit_pending_data(sk); - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_SUBFLOWSTALE); + MPTCP_INC_STATS(net, MPTCP_MIB_SUBFLOWSTALE); } unlock_sock_fast(ssk, slow); @@ -1903,8 +1903,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) } if (token) - return mptcp_userspace_pm_set_flags(sock_net(skb->sk), - token, &addr, &remote, bkup); + return mptcp_userspace_pm_set_flags(net, token, &addr, &remote, bkup); spin_lock_bh(&pernet->lock); entry = __lookup_addr(pernet, &addr.addr, lookup_by_id); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b7ad030dfe89..13595d6dad8c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -923,9 +923,8 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk) static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; - struct sock *sk = (struct sock *)msk; - sock_owned_by_me(sk); + msk_owned_by_me(msk); mptcp_for_each_subflow(msk, subflow) { if (READ_ONCE(subflow->data_avail)) @@ -1408,7 +1407,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) u64 linger_time; long tout = 0; - sock_owned_by_me(sk); + msk_owned_by_me(msk); if (__mptcp_check_fallback(msk)) { if (!msk->first) @@ -1890,7 +1889,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) u32 time, advmss = 1; u64 rtt_us, mstamp; - sock_owned_by_me(sk); + msk_owned_by_me(msk); if (copied <= 0) return; @@ -2217,7 +2216,7 @@ static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk) struct mptcp_subflow_context *subflow; int min_stale_count = INT_MAX; - sock_owned_by_me((const struct sock *)msk); + msk_owned_by_me(msk); if (__mptcp_check_fallback(msk)) return NULL; @@ -2724,8 +2723,8 @@ static int mptcp_init_sock(struct sock *sk) mptcp_ca_reset(sk); sk_sockets_allocated_inc(sk); - sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); - sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]); + sk->sk_rcvbuf = READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]); + sk->sk_sndbuf = READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]); return 0; } @@ -2892,6 +2891,12 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk) return EPOLLIN | EPOLLRDNORM; } +static void mptcp_listen_inuse_dec(struct sock *sk) +{ + if (inet_sk_state_load(sk) == TCP_LISTEN) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); +} + bool __mptcp_close(struct sock *sk, long timeout) { struct mptcp_subflow_context *subflow; @@ -2901,6 +2906,7 @@ bool __mptcp_close(struct sock *sk, long timeout) sk->sk_shutdown = SHUTDOWN_MASK; if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { + mptcp_listen_inuse_dec(sk); inet_sk_state_store(sk, TCP_CLOSE); goto cleanup; } @@ -3001,6 +3007,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) if (msk->fastopening) return 0; + mptcp_listen_inuse_dec(sk); inet_sk_state_store(sk, TCP_CLOSE); mptcp_stop_timer(sk); @@ -3639,12 +3646,13 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, static int mptcp_listen(struct socket *sock, int backlog) { struct mptcp_sock *msk = mptcp_sk(sock->sk); + struct sock *sk = sock->sk; struct socket *ssock; int err; pr_debug("msk=%p", msk); - lock_sock(sock->sk); + lock_sock(sk); ssock = __mptcp_nmpc_socket(msk); if (!ssock) { err = -EINVAL; @@ -3652,18 +3660,20 @@ static int mptcp_listen(struct socket *sock, int backlog) } mptcp_token_destroy(msk); - inet_sk_state_store(sock->sk, TCP_LISTEN); - sock_set_flag(sock->sk, SOCK_RCU_FREE); + inet_sk_state_store(sk, TCP_LISTEN); + sock_set_flag(sk, SOCK_RCU_FREE); err = ssock->ops->listen(ssock, backlog); - inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk)); - if (!err) - mptcp_copy_inaddrs(sock->sk, ssock->sk); + inet_sk_state_store(sk, inet_sk_state_load(ssock->sk)); + if (!err) { + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + mptcp_copy_inaddrs(sk, ssock->sk); + } mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED); unlock: - release_sock(sock->sk); + release_sock(sk); return err; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a0d1658ce59e..5a8af5657796 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -754,7 +754,7 @@ static inline void mptcp_token_init_request(struct request_sock *req) int mptcp_token_new_request(struct request_sock *req); void mptcp_token_destroy_request(struct request_sock *req); -int mptcp_token_new_connect(struct sock *sk); +int mptcp_token_new_connect(struct sock *ssk); void mptcp_token_accept(struct mptcp_subflow_request_sock *r, struct mptcp_sock *msk); bool mptcp_token_exists(u32 token); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index d4b1e6ec1b36..582ed93bcc8a 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -18,7 +18,7 @@ static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) { - sock_owned_by_me((const struct sock *)msk); + msk_owned_by_me(msk); if (likely(!__mptcp_check_fallback(msk))) return NULL; diff --git a/net/mptcp/token.c b/net/mptcp/token.c index 65430f314a68..5bb924534387 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -134,7 +134,7 @@ int mptcp_token_new_request(struct request_sock *req) /** * mptcp_token_new_connect - create new key/idsn/token for subflow - * @sk: the socket that will initiate a connection + * @ssk: the socket that will initiate a connection * * This function is called when a new outgoing mptcp connection is * initiated. @@ -148,11 +148,12 @@ int mptcp_token_new_request(struct request_sock *req) * * returns 0 on success. */ -int mptcp_token_new_connect(struct sock *sk) +int mptcp_token_new_connect(struct sock *ssk) { - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); int retries = MPTCP_TOKEN_MAX_RETRIES; + struct sock *sk = subflow->conn; struct token_bucket *bucket; again: @@ -169,12 +170,13 @@ again: } pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n", - sk, subflow->local_key, subflow->token, subflow->idsn); + ssk, subflow->local_key, subflow->token, subflow->idsn); WRITE_ONCE(msk->token, subflow->token); __sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain); bucket->chain_len++; spin_unlock_bh(&bucket->lock); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); return 0; } @@ -190,8 +192,10 @@ void mptcp_token_accept(struct mptcp_subflow_request_sock *req, struct mptcp_sock *msk) { struct mptcp_subflow_request_sock *pos; + struct sock *sk = (struct sock *)msk; struct token_bucket *bucket; + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); bucket = token_bucket(req->token); spin_lock_bh(&bucket->lock); @@ -370,12 +374,14 @@ void mptcp_token_destroy_request(struct request_sock *req) */ void mptcp_token_destroy(struct mptcp_sock *msk) { + struct sock *sk = (struct sock *)msk; struct token_bucket *bucket; struct mptcp_sock *pos; if (sk_unhashed((struct sock *)msk)) return; + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); bucket = token_bucket(msk->token); spin_lock_bh(&bucket->lock); pos = __token_lookup_msk(bucket, msk->token); diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c index 5d984bec1cd8..0758865ab658 100644 --- a/net/mptcp/token_test.c +++ b/net/mptcp/token_test.c @@ -57,6 +57,9 @@ static struct mptcp_sock *build_msk(struct kunit *test) KUNIT_EXPECT_NOT_ERR_OR_NULL(test, msk); refcount_set(&((struct sock *)msk)->sk_refcnt, 1); sock_net_set((struct sock *)msk, &init_net); + + /* be sure the token helpers can dereference sk->sk_prot */ + ((struct sock *)msk)->sk_prot = &tcp_prot; return msk; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 1286ae7d4609..90672e293e57 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3866,7 +3866,7 @@ static int __init ctnetlink_init(void) { int ret; - BUILD_BUG_ON(sizeof(struct ctnetlink_list_dump_ctx) > sizeof_field(struct netlink_callback, ctx)); + NL_ASSET_DUMP_CTX_FITS(struct ctnetlink_list_dump_ctx); ret = nfnetlink_subsys_register(&ctnl_subsys); if (ret < 0) { diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index d593d5b6d4b1..19aea7cba26e 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1861,8 +1861,9 @@ static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg, written = transport->stream_enqueue(vsk, msg, len - total_written); } + if (written < 0) { - err = -ENOMEM; + err = written; goto out_err; } diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 727da3c5879b..22039a0a5b35 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -125,21 +125,21 @@ always-y += sockex1_kern.o always-y += sockex2_kern.o always-y += sockex3_kern.o always-y += tracex1_kern.o -always-y += tracex2_kern.o +always-y += tracex2.bpf.o always-y += tracex3_kern.o always-y += tracex4_kern.o always-y += tracex5_kern.o always-y += tracex6_kern.o always-y += tracex7_kern.o always-y += sock_flags_kern.o -always-y += test_probe_write_user_kern.o -always-y += trace_output_kern.o +always-y += test_probe_write_user.bpf.o +always-y += trace_output.bpf.o always-y += tcbpf1_kern.o always-y += tc_l2_redirect_kern.o always-y += lathist_kern.o always-y += offwaketime_kern.o always-y += spintest_kern.o -always-y += map_perf_test_kern.o +always-y += map_perf_test.bpf.o always-y += test_overhead_tp_kern.o always-y += test_overhead_raw_tp_kern.o always-y += test_overhead_kprobe_kern.o @@ -147,7 +147,7 @@ always-y += parse_varlen.o parse_simple.o parse_ldabs.o always-y += test_cgrp2_tc_kern.o always-y += xdp1_kern.o always-y += xdp2_kern.o -always-y += test_current_task_under_cgroup_kern.o +always-y += test_current_task_under_cgroup.bpf.o always-y += trace_event_kern.o always-y += sampleip_kern.o always-y += lwt_len_hist_kern.o diff --git a/samples/bpf/gnu/stubs.h b/samples/bpf/gnu/stubs.h new file mode 100644 index 000000000000..719225b16626 --- /dev/null +++ b/samples/bpf/gnu/stubs.h @@ -0,0 +1 @@ +/* dummy .h to trick /usr/include/features.h to work with 'clang -target bpf' */ diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test.bpf.c index 7342c5b2f278..3cdeba2afe12 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test.bpf.c @@ -4,14 +4,12 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include <linux/skbuff.h> -#include <linux/netdevice.h> +#include "vmlinux.h" +#include <errno.h> #include <linux/version.h> -#include <uapi/linux/bpf.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> -#include "trace_common.h" #define MAX_ENTRIES 1000 #define MAX_NR_CPUS 1024 @@ -102,8 +100,8 @@ struct { __uint(max_entries, MAX_ENTRIES); } lru_hash_lookup_map SEC(".maps"); -SEC("kprobe/" SYSCALL(sys_getuid)) -int stress_hmap(struct pt_regs *ctx) +SEC("ksyscall/getuid") +int BPF_KSYSCALL(stress_hmap) { u32 key = bpf_get_current_pid_tgid(); long init_val = 1; @@ -120,8 +118,8 @@ int stress_hmap(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYSCALL(sys_geteuid)) -int stress_percpu_hmap(struct pt_regs *ctx) +SEC("ksyscall/geteuid") +int BPF_KSYSCALL(stress_percpu_hmap) { u32 key = bpf_get_current_pid_tgid(); long init_val = 1; @@ -137,8 +135,8 @@ int stress_percpu_hmap(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYSCALL(sys_getgid)) -int stress_hmap_alloc(struct pt_regs *ctx) +SEC("ksyscall/getgid") +int BPF_KSYSCALL(stress_hmap_alloc) { u32 key = bpf_get_current_pid_tgid(); long init_val = 1; @@ -154,8 +152,8 @@ int stress_hmap_alloc(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYSCALL(sys_getegid)) -int stress_percpu_hmap_alloc(struct pt_regs *ctx) +SEC("ksyscall/getegid") +int BPF_KSYSCALL(stress_percpu_hmap_alloc) { u32 key = bpf_get_current_pid_tgid(); long init_val = 1; @@ -170,11 +168,10 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx) } return 0; } - -SEC("kprobe/" SYSCALL(sys_connect)) -int stress_lru_hmap_alloc(struct pt_regs *ctx) +SEC("ksyscall/connect") +int BPF_KSYSCALL(stress_lru_hmap_alloc, int fd, struct sockaddr_in *uservaddr, + int addrlen) { - struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx); char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%dn"; union { u16 dst6[8]; @@ -187,14 +184,11 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx) u32 key; }; } test_params; - struct sockaddr_in6 *in6; + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)uservaddr; u16 test_case; - int addrlen, ret; long val = 1; u32 key = 0; - - in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(real_regs); - addrlen = (int)PT_REGS_PARM3_CORE(real_regs); + int ret; if (addrlen != sizeof(*in6)) return 0; @@ -251,8 +245,8 @@ done: return 0; } -SEC("kprobe/" SYSCALL(sys_gettid)) -int stress_lpm_trie_map_alloc(struct pt_regs *ctx) +SEC("ksyscall/gettid") +int BPF_KSYSCALL(stress_lpm_trie_map_alloc) { union { u32 b32[2]; @@ -273,8 +267,8 @@ int stress_lpm_trie_map_alloc(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYSCALL(sys_getpgid)) -int stress_hash_map_lookup(struct pt_regs *ctx) +SEC("ksyscall/getpgid") +int BPF_KSYSCALL(stress_hash_map_lookup) { u32 key = 1, i; long *value; @@ -286,8 +280,8 @@ int stress_hash_map_lookup(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYSCALL(sys_getppid)) -int stress_array_map_lookup(struct pt_regs *ctx) +SEC("ksyscall/getppid") +int BPF_KSYSCALL(stress_array_map_lookup) { u32 key = 1, i; long *value; diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 1bb53f4b29e1..d2fbcf963cdf 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -443,7 +443,7 @@ int main(int argc, char **argv) if (argc > 4) max_cnt = atoi(argv[4]); - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup.bpf.c index fbd43e2bb4d3..58b9cf7ed659 100644 --- a/samples/bpf/test_current_task_under_cgroup_kern.c +++ b/samples/bpf/test_current_task_under_cgroup.bpf.c @@ -5,12 +5,11 @@ * License as published by the Free Software Foundation. */ -#include <linux/ptrace.h> -#include <uapi/linux/bpf.h> +#include "vmlinux.h" #include <linux/version.h> #include <bpf/bpf_helpers.h> -#include <uapi/linux/utsname.h> -#include "trace_common.h" +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> struct { __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY); @@ -27,8 +26,8 @@ struct { } perf_map SEC(".maps"); /* Writes the last PID that called sync to a map at index 0 */ -SEC("kprobe/" SYSCALL(sys_sync)) -int bpf_prog1(struct pt_regs *ctx) +SEC("ksyscall/sync") +int BPF_KSYSCALL(bpf_prog1) { u64 pid = bpf_get_current_pid_tgid(); int idx = 0; diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c index ac251a417f45..9726ed2a8a8b 100644 --- a/samples/bpf/test_current_task_under_cgroup_user.c +++ b/samples/bpf/test_current_task_under_cgroup_user.c @@ -14,14 +14,14 @@ int main(int argc, char **argv) { pid_t remote_pid, local_pid = getpid(); + int cg2 = -1, idx = 0, rc = 1; struct bpf_link *link = NULL; struct bpf_program *prog; - int cg2, idx = 0, rc = 1; struct bpf_object *obj; char filename[256]; int map_fd[2]; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); @@ -103,7 +103,9 @@ int main(int argc, char **argv) rc = 0; err: - close(cg2); + if (cg2 != -1) + close(cg2); + cleanup_cgroup_environment(); cleanup: diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c index 5efb91763d65..1c161276d57b 100644 --- a/samples/bpf/test_lru_dist.c +++ b/samples/bpf/test_lru_dist.c @@ -42,11 +42,6 @@ static inline void INIT_LIST_HEAD(struct list_head *list) list->prev = list; } -static inline int list_empty(const struct list_head *head) -{ - return head->next == head; -} - static inline void __list_add(struct list_head *new, struct list_head *prev, struct list_head *next) diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map_kern.c index b0200c8eac09..0e17f9ade5c5 100644 --- a/samples/bpf/test_map_in_map_kern.c +++ b/samples/bpf/test_map_in_map_kern.c @@ -13,7 +13,6 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> -#include "trace_common.h" #define MAX_NR_PORTS 65536 diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user.bpf.c index 220a96438d75..a4f3798b7fb0 100644 --- a/samples/bpf/test_probe_write_user_kern.c +++ b/samples/bpf/test_probe_write_user.bpf.c @@ -4,14 +4,12 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include <linux/skbuff.h> -#include <linux/netdevice.h> -#include <uapi/linux/bpf.h> +#include "vmlinux.h" +#include <string.h> #include <linux/version.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> -#include "trace_common.h" struct { __uint(type, BPF_MAP_TYPE_HASH); @@ -28,25 +26,23 @@ struct { * This example sits on a syscall, and the syscall ABI is relatively stable * of course, across platforms, and over time, the ABI may change. */ -SEC("kprobe/" SYSCALL(sys_connect)) -int bpf_prog1(struct pt_regs *ctx) +SEC("ksyscall/connect") +int BPF_KSYSCALL(bpf_prog1, int fd, struct sockaddr_in *uservaddr, + int addrlen) { - struct pt_regs *real_regs = (struct pt_regs *)PT_REGS_PARM1_CORE(ctx); - void *sockaddr_arg = (void *)PT_REGS_PARM2_CORE(real_regs); - int sockaddr_len = (int)PT_REGS_PARM3_CORE(real_regs); struct sockaddr_in new_addr, orig_addr = {}; struct sockaddr_in *mapped_addr; - if (sockaddr_len > sizeof(orig_addr)) + if (addrlen > sizeof(orig_addr)) return 0; - if (bpf_probe_read_user(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0) + if (bpf_probe_read_user(&orig_addr, sizeof(orig_addr), uservaddr) != 0) return 0; mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr); if (mapped_addr != NULL) { memcpy(&new_addr, mapped_addr, sizeof(new_addr)); - bpf_probe_write_user(sockaddr_arg, &new_addr, + bpf_probe_write_user(uservaddr, &new_addr, sizeof(new_addr)); } return 0; diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c index 00ccfb834e45..2a539aec4116 100644 --- a/samples/bpf/test_probe_write_user_user.c +++ b/samples/bpf/test_probe_write_user_user.c @@ -24,7 +24,7 @@ int main(int ac, char **argv) mapped_addr_in = (struct sockaddr_in *)&mapped_addr; tmp_addr_in = (struct sockaddr_in *)&tmp_addr; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); diff --git a/samples/bpf/trace_common.h b/samples/bpf/trace_common.h deleted file mode 100644 index 8cb5400aed1f..000000000000 --- a/samples/bpf/trace_common.h +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#ifndef __TRACE_COMMON_H -#define __TRACE_COMMON_H - -#ifdef __x86_64__ -#define SYSCALL(SYS) "__x64_" __stringify(SYS) -#elif defined(__s390x__) -#define SYSCALL(SYS) "__s390x_" __stringify(SYS) -#else -#define SYSCALL(SYS) __stringify(SYS) -#endif - -#endif diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output.bpf.c index b64815af0943..565a73b51b04 100644 --- a/samples/bpf/trace_output_kern.c +++ b/samples/bpf/trace_output.bpf.c @@ -1,8 +1,6 @@ -#include <linux/ptrace.h> +#include "vmlinux.h" #include <linux/version.h> -#include <uapi/linux/bpf.h> #include <bpf/bpf_helpers.h> -#include "trace_common.h" struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); @@ -11,7 +9,7 @@ struct { __uint(max_entries, 2); } my_map SEC(".maps"); -SEC("kprobe/" SYSCALL(sys_write)) +SEC("ksyscall/write") int bpf_prog1(struct pt_regs *ctx) { struct S { diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index 371732f9cf8e..d316fd2c8e24 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c @@ -51,7 +51,7 @@ int main(int argc, char **argv) char filename[256]; FILE *f; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2.bpf.c index 93e0b7680b4f..0a5c75b367be 100644 --- a/samples/bpf/tracex2_kern.c +++ b/samples/bpf/tracex2.bpf.c @@ -4,13 +4,11 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include <linux/skbuff.h> -#include <linux/netdevice.h> +#include "vmlinux.h" #include <linux/version.h> -#include <uapi/linux/bpf.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "trace_common.h" +#include <bpf/bpf_core_read.h> struct { __uint(type, BPF_MAP_TYPE_HASH); @@ -78,15 +76,14 @@ struct { __uint(max_entries, 1024); } my_hist_map SEC(".maps"); -SEC("kprobe/" SYSCALL(sys_write)) -int bpf_prog3(struct pt_regs *ctx) +SEC("ksyscall/write") +int BPF_KSYSCALL(bpf_prog3, unsigned int fd, const char *buf, size_t count) { - long write_size = PT_REGS_PARM3(ctx); long init_val = 1; long *value; struct hist_key key; - key.index = log2l(write_size); + key.index = log2l(count); key.pid_tgid = bpf_get_current_pid_tgid(); key.uid_gid = bpf_get_current_uid_gid(); bpf_get_current_comm(&key.comm, sizeof(key.comm)); diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index 089e408abd7a..2131f1648cf1 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -123,7 +123,7 @@ int main(int ac, char **argv) int i, j = 0; FILE *f; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { fprintf(stderr, "ERROR: opening BPF object file failed\n"); diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c index 227b05a0bc88..dee8f0a091ba 100644 --- a/samples/bpf/tracex4_user.c +++ b/samples/bpf/tracex4_user.c @@ -51,7 +51,7 @@ int main(int ac, char **argv) struct bpf_program *prog; struct bpf_object *obj; char filename[256]; - int map_fd, i, j = 0; + int map_fd, j = 0; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); obj = bpf_object__open_file(filename, NULL); @@ -82,7 +82,7 @@ int main(int ac, char **argv) j++; } - for (i = 0; ; i++) { + while (1) { print_old_objects(map_fd); sleep(1); } diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index f610e184ce02..ab20ecc5acce 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -293,3 +293,6 @@ FORCE: .PHONY: all FORCE bootstrap clean install-bin install uninstall .PHONY: doc doc-clean doc-install doc-uninstall .DEFAULT_GOAL := all + +# Delete partially updated (corrupted) files on error +.DELETE_ON_ERROR: diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 19a3112e271a..f7375a119f54 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -56,13 +56,17 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OU DESTDIR=$(LIBBPF_DESTDIR) prefix= EXTRA_CFLAGS="$(CFLAGS)" \ $(abspath $@) install_headers +LIBELF_FLAGS := $(shell $(HOSTPKG_CONFIG) libelf --cflags 2>/dev/null) +LIBELF_LIBS := $(shell $(HOSTPKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf) + CFLAGS += -g \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi \ -I$(LIBBPF_INCLUDE) \ - -I$(SUBCMD_SRC) + -I$(SUBCMD_SRC) \ + $(LIBELF_FLAGS) -LIBS = -lelf -lz +LIBS = $(LIBELF_LIBS) -lz export srctree OUTPUT CFLAGS Q include $(srctree)/tools/build/Makefile.include diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 464ca3f01fe7..bc1a3d232ae4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2001,6 +2001,9 @@ union bpf_attr { * sending the packet. This flag was added for GRE * encapsulation, but might be used with other protocols * as well in the future. + * **BPF_F_NO_TUNNEL_KEY** + * Add a flag to tunnel metadata indicating that no tunnel + * key should be set in the resulting tunnel header. * * Here is a typical usage on the transmit path: * @@ -5764,6 +5767,7 @@ enum { BPF_F_ZERO_CSUM_TX = (1ULL << 1), BPF_F_DONT_FRAGMENT = (1ULL << 2), BPF_F_SEQ_NUMBER = (1ULL << 3), + BPF_F_NO_TUNNEL_KEY = (1ULL << 4), }; /* BPF_FUNC_skb_get_tunnel_key flags. */ diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 2972dc25ff72..bdb0f6b5be84 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -32,6 +32,9 @@ #elif defined(__TARGET_ARCH_arc) #define bpf_target_arc #define bpf_target_defined +#elif defined(__TARGET_ARCH_loongarch) + #define bpf_target_loongarch + #define bpf_target_defined #else /* Fall back to what the compiler says */ @@ -62,6 +65,9 @@ #elif defined(__arc__) #define bpf_target_arc #define bpf_target_defined +#elif defined(__loongarch__) + #define bpf_target_loongarch + #define bpf_target_defined #endif /* no compiler target */ #endif @@ -137,7 +143,7 @@ struct pt_regs___s390 { #define __PT_PARM3_REG gprs[4] #define __PT_PARM4_REG gprs[5] #define __PT_PARM5_REG gprs[6] -#define __PT_RET_REG grps[14] +#define __PT_RET_REG gprs[14] #define __PT_FP_REG gprs[11] /* Works only with CONFIG_FRAME_POINTER */ #define __PT_RC_REG gprs[2] #define __PT_SP_REG gprs[15] @@ -258,6 +264,23 @@ struct pt_regs___arm64 { /* arc does not select ARCH_HAS_SYSCALL_WRAPPER. */ #define PT_REGS_SYSCALL_REGS(ctx) ctx +#elif defined(bpf_target_loongarch) + +/* https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html */ + +#define __PT_PARM1_REG regs[4] +#define __PT_PARM2_REG regs[5] +#define __PT_PARM3_REG regs[6] +#define __PT_PARM4_REG regs[7] +#define __PT_PARM5_REG regs[8] +#define __PT_RET_REG regs[1] +#define __PT_FP_REG regs[22] +#define __PT_RC_REG regs[4] +#define __PT_SP_REG regs[3] +#define __PT_IP_REG csr_era +/* loongarch does not select ARCH_HAS_SYSCALL_WRAPPER. */ +#define PT_REGS_SYSCALL_REGS(ctx) ctx + #endif #if defined(bpf_target_defined) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 71e165b09ed5..64841117fbb2 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -688,8 +688,21 @@ int btf__align_of(const struct btf *btf, __u32 id) if (align <= 0) return libbpf_err(align); max_align = max(max_align, align); + + /* if field offset isn't aligned according to field + * type's alignment, then struct must be packed + */ + if (btf_member_bitfield_size(t, i) == 0 && + (m->offset % (8 * align)) != 0) + return 1; } + /* if struct/union size isn't a multiple of its alignment, + * then struct must be packed + */ + if ((t->size % max_align) != 0) + return 1; + return max_align; } default: @@ -990,7 +1003,8 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, err = 0; if (!btf_data) { - err = -ENOENT; + pr_warn("failed to find '%s' ELF section in %s\n", BTF_ELF_SEC, path); + err = -ENODATA; goto done; } btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index deb2bc9a0a7b..580985ee5545 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -13,6 +13,7 @@ #include <ctype.h> #include <endian.h> #include <errno.h> +#include <limits.h> #include <linux/err.h> #include <linux/btf.h> #include <linux/kernel.h> @@ -833,14 +834,9 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id, const struct btf_type *t) { const struct btf_member *m; - int align, i, bit_sz; + int max_align = 1, align, i, bit_sz; __u16 vlen; - align = btf__align_of(btf, id); - /* size of a non-packed struct has to be a multiple of its alignment*/ - if (align && t->size % align) - return true; - m = btf_members(t); vlen = btf_vlen(t); /* all non-bitfield fields have to be naturally aligned */ @@ -849,8 +845,11 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id, bit_sz = btf_member_bitfield_size(t, i); if (align && bit_sz == 0 && m->offset % (8 * align) != 0) return true; + max_align = max(align, max_align); } - + /* size of a non-packed struct has to be a multiple of its alignment */ + if (t->size % max_align != 0) + return true; /* * if original struct was marked as packed, but its layout is * naturally aligned, we'll detect that it's not packed @@ -858,44 +857,97 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id, return false; } -static int chip_away_bits(int total, int at_most) -{ - return total % at_most ? : at_most; -} - static void btf_dump_emit_bit_padding(const struct btf_dump *d, - int cur_off, int m_off, int m_bit_sz, - int align, int lvl) + int cur_off, int next_off, int next_align, + bool in_bitfield, int lvl) { - int off_diff = m_off - cur_off; - int ptr_bits = d->ptr_sz * 8; + const struct { + const char *name; + int bits; + } pads[] = { + {"long", d->ptr_sz * 8}, {"int", 32}, {"short", 16}, {"char", 8} + }; + int new_off, pad_bits, bits, i; + const char *pad_type; + + if (cur_off >= next_off) + return; /* no gap */ + + /* For filling out padding we want to take advantage of + * natural alignment rules to minimize unnecessary explicit + * padding. First, we find the largest type (among long, int, + * short, or char) that can be used to force naturally aligned + * boundary. Once determined, we'll use such type to fill in + * the remaining padding gap. In some cases we can rely on + * compiler filling some gaps, but sometimes we need to force + * alignment to close natural alignment with markers like + * `long: 0` (this is always the case for bitfields). Note + * that even if struct itself has, let's say 4-byte alignment + * (i.e., it only uses up to int-aligned types), using `long: + * X;` explicit padding doesn't actually change struct's + * overall alignment requirements, but compiler does take into + * account that type's (long, in this example) natural + * alignment requirements when adding implicit padding. We use + * this fact heavily and don't worry about ruining correct + * struct alignment requirement. + */ + for (i = 0; i < ARRAY_SIZE(pads); i++) { + pad_bits = pads[i].bits; + pad_type = pads[i].name; - if (off_diff <= 0) - /* no gap */ - return; - if (m_bit_sz == 0 && off_diff < align * 8) - /* natural padding will take care of a gap */ - return; + new_off = roundup(cur_off, pad_bits); + if (new_off <= next_off) + break; + } - while (off_diff > 0) { - const char *pad_type; - int pad_bits; - - if (ptr_bits > 32 && off_diff > 32) { - pad_type = "long"; - pad_bits = chip_away_bits(off_diff, ptr_bits); - } else if (off_diff > 16) { - pad_type = "int"; - pad_bits = chip_away_bits(off_diff, 32); - } else if (off_diff > 8) { - pad_type = "short"; - pad_bits = chip_away_bits(off_diff, 16); - } else { - pad_type = "char"; - pad_bits = chip_away_bits(off_diff, 8); + if (new_off > cur_off && new_off <= next_off) { + /* We need explicit `<type>: 0` aligning mark if next + * field is right on alignment offset and its + * alignment requirement is less strict than <type>'s + * alignment (so compiler won't naturally align to the + * offset we expect), or if subsequent `<type>: X`, + * will actually completely fit in the remaining hole, + * making compiler basically ignore `<type>: X` + * completely. + */ + if (in_bitfield || + (new_off == next_off && roundup(cur_off, next_align * 8) != new_off) || + (new_off != next_off && next_off - new_off <= new_off - cur_off)) + /* but for bitfields we'll emit explicit bit count */ + btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, + in_bitfield ? new_off - cur_off : 0); + cur_off = new_off; + } + + /* Now we know we start at naturally aligned offset for a chosen + * padding type (long, int, short, or char), and so the rest is just + * a straightforward filling of remaining padding gap with full + * `<type>: sizeof(<type>);` markers, except for the last one, which + * might need smaller than sizeof(<type>) padding. + */ + while (cur_off != next_off) { + bits = min(next_off - cur_off, pad_bits); + if (bits == pad_bits) { + btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits); + cur_off += bits; + continue; + } + /* For the remainder padding that doesn't cover entire + * pad_type bit length, we pick the smallest necessary type. + * This is pure aesthetics, we could have just used `long`, + * but having smallest necessary one communicates better the + * scale of the padding gap. + */ + for (i = ARRAY_SIZE(pads) - 1; i >= 0; i--) { + pad_type = pads[i].name; + pad_bits = pads[i].bits; + if (pad_bits < bits) + continue; + + btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, bits); + cur_off += bits; + break; } - btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits); - off_diff -= pad_bits; } } @@ -915,9 +967,11 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, { const struct btf_member *m = btf_members(t); bool is_struct = btf_is_struct(t); - int align, i, packed, off = 0; + bool packed, prev_bitfield = false; + int align, i, off = 0; __u16 vlen = btf_vlen(t); + align = btf__align_of(d->btf, id); packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0; btf_dump_printf(d, "%s%s%s {", @@ -927,41 +981,47 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, for (i = 0; i < vlen; i++, m++) { const char *fname; - int m_off, m_sz; + int m_off, m_sz, m_align; + bool in_bitfield; fname = btf_name_of(d, m->name_off); m_sz = btf_member_bitfield_size(t, i); m_off = btf_member_bit_offset(t, i); - align = packed ? 1 : btf__align_of(d->btf, m->type); + m_align = packed ? 1 : btf__align_of(d->btf, m->type); + + in_bitfield = prev_bitfield && m_sz != 0; - btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1); + btf_dump_emit_bit_padding(d, off, m_off, m_align, in_bitfield, lvl + 1); btf_dump_printf(d, "\n%s", pfx(lvl + 1)); btf_dump_emit_type_decl(d, m->type, fname, lvl + 1); if (m_sz) { btf_dump_printf(d, ": %d", m_sz); off = m_off + m_sz; + prev_bitfield = true; } else { m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type)); off = m_off + m_sz * 8; + prev_bitfield = false; } + btf_dump_printf(d, ";"); } /* pad at the end, if necessary */ - if (is_struct) { - align = packed ? 1 : btf__align_of(d->btf, id); - btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align, - lvl + 1); - } + if (is_struct) + btf_dump_emit_bit_padding(d, off, t->size * 8, align, false, lvl + 1); /* * Keep `struct empty {}` on a single line, * only print newline when there are regular or padding fields. */ - if (vlen || t->size) + if (vlen || t->size) { btf_dump_printf(d, "\n"); - btf_dump_printf(d, "%s}", pfx(lvl)); + btf_dump_printf(d, "%s}", pfx(lvl)); + } else { + btf_dump_printf(d, "}"); + } if (packed) btf_dump_printf(d, " __attribute__((packed))"); } @@ -1073,6 +1133,43 @@ static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, else btf_dump_emit_enum64_val(d, t, lvl, vlen); btf_dump_printf(d, "\n%s}", pfx(lvl)); + + /* special case enums with special sizes */ + if (t->size == 1) { + /* one-byte enums can be forced with mode(byte) attribute */ + btf_dump_printf(d, " __attribute__((mode(byte)))"); + } else if (t->size == 8 && d->ptr_sz == 8) { + /* enum can be 8-byte sized if one of the enumerator values + * doesn't fit in 32-bit integer, or by adding mode(word) + * attribute (but probably only on 64-bit architectures); do + * our best here to try to satisfy the contract without adding + * unnecessary attributes + */ + bool needs_word_mode; + + if (btf_is_enum(t)) { + /* enum can't represent 64-bit values, so we need word mode */ + needs_word_mode = true; + } else { + /* enum64 needs mode(word) if none of its values has + * non-zero upper 32-bits (which means that all values + * fit in 32-bit integers and won't cause compiler to + * bump enum to be 64-bit naturally + */ + int i; + + needs_word_mode = true; + for (i = 0; i < vlen; i++) { + if (btf_enum64(t)[i].val_hi32 != 0) { + needs_word_mode = false; + break; + } + } + } + if (needs_word_mode) + btf_dump_printf(d, " __attribute__((mode(word)))"); + } + } static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2a82f49ce16f..a5c67a3c93c5 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9903,7 +9903,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, char errmsg[STRERR_BUFSIZE]; int type, pfd; - if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) + if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) return -EINVAL; memset(&attr, 0, attr_sz); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index eee883f007f9..898db26e42e9 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -96,6 +96,12 @@ enum libbpf_print_level { typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level, const char *, va_list ap); +/** + * @brief **libbpf_set_print()** sets user-provided log callback function to + * be used for libbpf warnings and informational messages. + * @param fn The log print function. If NULL, libbpf won't print anything. + * @return Pointer to old print function. + */ LIBBPF_API libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn); /* Hide internal to user */ @@ -174,6 +180,14 @@ struct bpf_object_open_opts { }; #define bpf_object_open_opts__last_field kernel_log_level +/** + * @brief **bpf_object__open()** creates a bpf_object by opening + * the BPF ELF object file pointed to by the passed path and loading it + * into memory. + * @param path BPF object file path. + * @return pointer to the new bpf_object; or NULL is returned on error, + * error code is stored in errno + */ LIBBPF_API struct bpf_object *bpf_object__open(const char *path); /** @@ -203,10 +217,21 @@ LIBBPF_API struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts); -/* Load/unload object into/from kernel */ +/** + * @brief **bpf_object__load()** loads BPF object into kernel. + * @param obj Pointer to a valid BPF object instance returned by + * **bpf_object__open*()** APIs + * @return 0, on success; negative error code, otherwise, error code is + * stored in errno + */ LIBBPF_API int bpf_object__load(struct bpf_object *obj); -LIBBPF_API void bpf_object__close(struct bpf_object *object); +/** + * @brief **bpf_object__close()** closes a BPF object and releases all + * resources. + * @param obj Pointer to a valid BPF object + */ +LIBBPF_API void bpf_object__close(struct bpf_object *obj); /* pin_maps and unpin_maps can both be called with a NULL path, in which case * they will use the pin_path attribute of each map (and ignore all maps that diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 71bf5691a689..11c36a3c1a9f 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -382,3 +382,6 @@ LIBBPF_1.1.0 { user_ring_buffer__reserve_blocking; user_ring_buffer__submit; } LIBBPF_1.0.0; + +LIBBPF_1.2.0 { +} LIBBPF_1.1.0; diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c index 96f67a772a1b..6b180172ec6b 100644 --- a/tools/lib/bpf/libbpf_errno.c +++ b/tools/lib/bpf/libbpf_errno.c @@ -39,14 +39,14 @@ static const char *libbpf_strerror_table[NR_ERRNO] = { int libbpf_strerror(int err, char *buf, size_t size) { + int ret; + if (!buf || !size) return libbpf_err(-EINVAL); err = err > 0 ? err : -err; if (err < __LIBBPF_ERRNO__START) { - int ret; - ret = strerror_r(err, buf, size); buf[size - 1] = '\0'; return libbpf_err_errno(ret); @@ -56,12 +56,20 @@ int libbpf_strerror(int err, char *buf, size_t size) const char *msg; msg = libbpf_strerror_table[ERRNO_OFFSET(err)]; - snprintf(buf, size, "%s", msg); + ret = snprintf(buf, size, "%s", msg); buf[size - 1] = '\0'; + /* The length of the buf and msg is positive. + * A negative number may be returned only when the + * size exceeds INT_MAX. Not likely to appear. + */ + if (ret >= size) + return libbpf_err(-ERANGE); return 0; } - snprintf(buf, size, "Unknown libbpf error %d", err); + ret = snprintf(buf, size, "Unknown libbpf error %d", err); buf[size - 1] = '\0'; + if (ret >= size) + return libbpf_err(-ERANGE); return libbpf_err(-ENOENT); } diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 377642ff51fc..e4d05662a96c 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -543,6 +543,7 @@ static inline int ensure_good_fd(int fd) fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); saved_errno = errno; close(old_fd); + errno = saved_errno; if (fd < 0) { pr_warn("failed to dup FD %d to FD > 2: %d\n", old_fd, -saved_errno); errno = saved_errno; diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index e944f5bce728..1fd2eeac5cfc 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -4,6 +4,6 @@ #define __LIBBPF_VERSION_H #define LIBBPF_MAJOR_VERSION 1 -#define LIBBPF_MINOR_VERSION 1 +#define LIBBPF_MINOR_VERSION 2 #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index 3fc3e54b19aa..96e8371f5c2a 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -27,6 +27,7 @@ get_func_args_test # trampoline get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (trampoline) get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) htab_update # failed to attach: ERROR: strerror_r(-524)=22 (trampoline) +jit_probe_mem # jit_probe_mem__open_and_load unexpected error: -524 (kfunc) kfree_skb # attach fentry unexpected error: -524 (trampoline) kfunc_call # 'bpf_prog_active': not found in kernel BTF (?) kfunc_dynptr_param # JIT does not support calling kernel function (kfunc) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index c22c43bbee19..205e8c3c346a 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -626,3 +626,6 @@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ liburandom_read.so) .PHONY: docs docs-clean + +# Delete partially updated (corrupted) files on error +.DELETE_ON_ERROR: diff --git a/tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c b/tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c new file mode 100644 index 000000000000..5639428607e6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <network_helpers.h> + +#include "jit_probe_mem.skel.h" + +void test_jit_probe_mem(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct jit_probe_mem *skel; + int ret; + + skel = jit_probe_mem__open_and_load(); + if (!ASSERT_OK_PTR(skel, "jit_probe_mem__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_jit_probe_mem), &opts); + ASSERT_OK(ret, "jit_probe_mem ret"); + ASSERT_OK(opts.retval, "jit_probe_mem opts.retval"); + ASSERT_EQ(skel->data->total_sum, 192, "jit_probe_mem total_sum"); + + jit_probe_mem__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c index e5560a656030..e01690618e1e 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c @@ -53,7 +53,7 @@ struct bitfields_only_mixed_types { */ /* ------ END-EXPECTED-OUTPUT ------ */ struct bitfield_mixed_with_others { - long: 4; /* char is enough as a backing field */ + char: 4; /* char is enough as a backing field */ int a: 4; /* 8-bit implicit padding */ short b; /* combined with previous bitfield */ diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c index e304b6204bd9..7998f27df7dd 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c @@ -58,7 +58,81 @@ union jump_code_union { } __attribute__((packed)); }; -/*------ END-EXPECTED-OUTPUT ------ */ +/* ----- START-EXPECTED-OUTPUT ----- */ +/* + *struct nested_packed_but_aligned_struct { + * int x1; + * int x2; + *}; + * + *struct outer_implicitly_packed_struct { + * char y1; + * struct nested_packed_but_aligned_struct y2; + *} __attribute__((packed)); + * + */ +/* ------ END-EXPECTED-OUTPUT ------ */ + +struct nested_packed_but_aligned_struct { + int x1; + int x2; +} __attribute__((packed)); + +struct outer_implicitly_packed_struct { + char y1; + struct nested_packed_but_aligned_struct y2; +}; +/* ----- START-EXPECTED-OUTPUT ----- */ +/* + *struct usb_ss_ep_comp_descriptor { + * char: 8; + * char bDescriptorType; + * char bMaxBurst; + * short wBytesPerInterval; + *}; + * + *struct usb_host_endpoint { + * long: 64; + * char: 8; + * struct usb_ss_ep_comp_descriptor ss_ep_comp; + * long: 0; + *} __attribute__((packed)); + * + */ +/* ------ END-EXPECTED-OUTPUT ------ */ + +struct usb_ss_ep_comp_descriptor { + char: 8; + char bDescriptorType; + char bMaxBurst; + int: 0; + short wBytesPerInterval; +} __attribute__((packed)); + +struct usb_host_endpoint { + long: 64; + char: 8; + struct usb_ss_ep_comp_descriptor ss_ep_comp; + long: 0; +}; + +/* ----- START-EXPECTED-OUTPUT ----- */ +struct nested_packed_struct { + int a; + char b; +} __attribute__((packed)); + +struct outer_nonpacked_struct { + short a; + struct nested_packed_struct b; +}; + +struct outer_packed_struct { + short a; + struct nested_packed_struct b; +} __attribute__((packed)); + +/* ------ END-EXPECTED-OUTPUT ------ */ int f(struct { struct packed_trailing_space _1; @@ -69,6 +143,10 @@ int f(struct { union union_is_never_packed _6; union union_does_not_need_packing _7; union jump_code_union _8; + struct outer_implicitly_packed_struct _9; + struct usb_host_endpoint _10; + struct outer_nonpacked_struct _11; + struct outer_packed_struct _12; } *_) { return 0; diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c index 7cb522d22a66..79276fbe454a 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c @@ -19,7 +19,7 @@ struct padded_implicitly { /* *struct padded_explicitly { * int a; - * int: 32; + * long: 0; * int b; *}; * @@ -28,41 +28,28 @@ struct padded_implicitly { struct padded_explicitly { int a; - int: 1; /* algo will explicitly pad with full 32 bits here */ + int: 1; /* algo will emit aligning `long: 0;` here */ int b; }; /* ----- START-EXPECTED-OUTPUT ----- */ -/* - *struct padded_a_lot { - * int a; - * long: 32; - * long: 64; - * long: 64; - * int b; - *}; - * - */ -/* ------ END-EXPECTED-OUTPUT ------ */ - struct padded_a_lot { int a; - /* 32 bit of implicit padding here, which algo will make explicit */ long: 64; long: 64; int b; }; +/* ------ END-EXPECTED-OUTPUT ------ */ + /* ----- START-EXPECTED-OUTPUT ----- */ /* *struct padded_cache_line { * int a; - * long: 32; * long: 64; * long: 64; * long: 64; * int b; - * long: 32; * long: 64; * long: 64; * long: 64; @@ -85,7 +72,7 @@ struct padded_cache_line { *struct zone { * int a; * short b; - * short: 16; + * long: 0; * struct zone_padding __pad__; *}; * @@ -108,6 +95,131 @@ struct padding_wo_named_members { long: 64; }; +struct padding_weird_1 { + int a; + long: 64; + short: 16; + short b; +}; + +/* ------ END-EXPECTED-OUTPUT ------ */ + +/* ----- START-EXPECTED-OUTPUT ----- */ +/* + *struct padding_weird_2 { + * long: 56; + * char a; + * long: 56; + * char b; + * char: 8; + *}; + * + */ +/* ------ END-EXPECTED-OUTPUT ------ */ +struct padding_weird_2 { + int: 32; /* these paddings will be collapsed into `long: 56;` */ + short: 16; + char: 8; + char a; + int: 32; /* these paddings will be collapsed into `long: 56;` */ + short: 16; + char: 8; + char b; + char: 8; +}; + +/* ----- START-EXPECTED-OUTPUT ----- */ +struct exact_1byte { + char x; +}; + +struct padded_1byte { + char: 8; +}; + +struct exact_2bytes { + short x; +}; + +struct padded_2bytes { + short: 16; +}; + +struct exact_4bytes { + int x; +}; + +struct padded_4bytes { + int: 32; +}; + +struct exact_8bytes { + long x; +}; + +struct padded_8bytes { + long: 64; +}; + +struct ff_periodic_effect { + int: 32; + short magnitude; + long: 0; + short phase; + long: 0; + int: 32; + int custom_len; + short *custom_data; +}; + +struct ib_wc { + long: 64; + long: 64; + int: 32; + int byte_len; + void *qp; + union {} ex; + long: 64; + int slid; + int wc_flags; + long: 64; + char smac[6]; + long: 0; + char network_hdr_type; +}; + +struct acpi_object_method { + long: 64; + char: 8; + char type; + short reference_count; + char flags; + short: 0; + char: 8; + char sync_level; + long: 64; + void *node; + void *aml_start; + union {} dispatch; + long: 64; + int aml_length; +}; + +struct nested_unpacked { + int x; +}; + +struct nested_packed { + struct nested_unpacked a; + char c; +} __attribute__((packed)); + +struct outer_mixed_but_unpacked { + struct nested_packed b1; + short a1; + struct nested_packed b2; +}; + /* ------ END-EXPECTED-OUTPUT ------ */ int f(struct { @@ -117,6 +229,20 @@ int f(struct { struct padded_cache_line _4; struct zone _5; struct padding_wo_named_members _6; + struct padding_weird_1 _7; + struct padding_weird_2 _8; + struct exact_1byte _100; + struct padded_1byte _101; + struct exact_2bytes _102; + struct padded_2bytes _103; + struct exact_4bytes _104; + struct padded_4bytes _105; + struct exact_8bytes _106; + struct padded_8bytes _107; + struct ff_periodic_effect _200; + struct ib_wc _201; + struct acpi_object_method _202; + struct outer_mixed_but_unpacked _203; } *_) { return 0; diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index 4ee4748133fe..26fffb02ed10 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -25,6 +25,39 @@ typedef enum { H = 2, } e3_t; +/* ----- START-EXPECTED-OUTPUT ----- */ +/* + *enum e_byte { + * EBYTE_1 = 0, + * EBYTE_2 = 1, + *} __attribute__((mode(byte))); + * + */ +/* ----- END-EXPECTED-OUTPUT ----- */ +enum e_byte { + EBYTE_1, + EBYTE_2, +} __attribute__((mode(byte))); + +/* ----- START-EXPECTED-OUTPUT ----- */ +/* + *enum e_word { + * EWORD_1 = 0LL, + * EWORD_2 = 1LL, + *} __attribute__((mode(word))); + * + */ +/* ----- END-EXPECTED-OUTPUT ----- */ +enum e_word { + EWORD_1, + EWORD_2, +} __attribute__((mode(word))); /* force to use 8-byte backing for this enum */ + +/* ----- START-EXPECTED-OUTPUT ----- */ +enum e_big { + EBIG_1 = 1000000000000ULL, +}; + typedef int int_t; typedef volatile const int * volatile const crazy_ptr_t; @@ -224,6 +257,9 @@ struct root_struct { enum e2 _2; e2_t _2_1; e3_t _2_2; + enum e_byte _100; + enum e_word _101; + enum e_big _102; struct struct_w_typedefs _3; anon_struct_t _7; struct struct_fwd *_8; diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c new file mode 100644 index 000000000000..2d2e61470794 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +static struct prog_test_ref_kfunc __kptr_ref *v; +long total_sum = -1; + +extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; +extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; + +SEC("tc") +int test_jit_probe_mem(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p; + unsigned long zero = 0, sum; + + p = bpf_kfunc_call_test_acquire(&zero); + if (!p) + return 1; + + p = bpf_kptr_xchg(&v, p); + if (p) + goto release_out; + + /* Direct map value access of kptr, should be PTR_UNTRUSTED */ + p = v; + if (!p) + return 1; + + asm volatile ( + "r9 = %[p];" + "%[sum] = 0;" + + /* r8 = p->a */ + "r8 = *(u32 *)(r9 + 0);" + "%[sum] += r8;" + + /* r8 = p->b */ + "r8 = *(u32 *)(r9 + 4);" + "%[sum] += r8;" + + "r9 += 8;" + /* r9 = p->a */ + "r9 = *(u32 *)(r9 - 8);" + "%[sum] += r9;" + + : [sum] "=r"(sum) + : [p] "r"(p) + : "r8", "r9" + ); + + total_sum = sum; + return 0; +release_out: + bpf_kfunc_call_test_release(p); + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 98af55f0bcd3..508da4a23c4f 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -82,6 +82,27 @@ int gre_set_tunnel(struct __sk_buff *skb) } SEC("tc") +int gre_set_tunnel_no_key(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER | + BPF_F_NO_TUNNEL_KEY); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("tc") int gre_get_tunnel(struct __sk_buff *skb) { int ret; diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index 2eaedc1d9ed3..06857b689c11 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -66,15 +66,20 @@ config_device() add_gre_tunnel() { + tun_key= + if [ -n "$1" ]; then + tun_key="key $1" + fi + # at_ns0 namespace ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ + ip link add dev $DEV_NS type $TYPE seq $tun_key \ local 172.16.1.100 remote 172.16.1.200 ip netns exec at_ns0 ip link set dev $DEV_NS up ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 # root namespace - ip link add dev $DEV type $TYPE key 2 external + ip link add dev $DEV type $TYPE $tun_key external ip link set dev $DEV up ip addr add dev $DEV 10.1.1.200/24 } @@ -238,7 +243,7 @@ test_gre() check $TYPE config_device - add_gre_tunnel + add_gre_tunnel 2 attach_bpf $DEV gre_set_tunnel gre_get_tunnel ping $PING_ARG 10.1.1.100 check_err $? @@ -253,6 +258,30 @@ test_gre() echo -e ${GREEN}"PASS: $TYPE"${NC} } +test_gre_no_tunnel_key() +{ + TYPE=gre + DEV_NS=gre00 + DEV=gre11 + ret=0 + + check $TYPE + config_device + add_gre_tunnel + attach_bpf $DEV gre_set_tunnel_no_key gre_get_tunnel + ping $PING_ARG 10.1.1.100 + check_err $? + ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 + check_err $? + cleanup + + if [ $ret -ne 0 ]; then + echo -e ${RED}"FAIL: $TYPE"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: $TYPE"${NC} +} + test_ip6gre() { TYPE=ip6gre @@ -589,6 +618,7 @@ cleanup() ip link del ipip6tnl11 2> /dev/null ip link del ip6ip6tnl11 2> /dev/null ip link del gretap11 2> /dev/null + ip link del gre11 2> /dev/null ip link del ip6gre11 2> /dev/null ip link del ip6gretap11 2> /dev/null ip link del geneve11 2> /dev/null @@ -641,6 +671,10 @@ bpf_tunnel_test() test_gre errors=$(( $errors + $? )) + echo "Testing GRE tunnel (without tunnel keys)..." + test_gre_no_tunnel_key + errors=$(( $errors + $? )) + echo "Testing IP6GRE tunnel..." test_ip6gre errors=$(( $errors + $? )) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 24bcd7b9bdb2..ef628b16fe9b 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -17,6 +17,11 @@ flush_pids() sleep 1.1 ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null + + for _ in $(seq 10); do + [ -z "$(ip netns pids "${ns}")" ] && break + sleep 0.1 + done } cleanup() @@ -37,15 +42,20 @@ if [ $? -ne 0 ];then exit $ksft_skip fi +get_msk_inuse() +{ + ip netns exec $ns cat /proc/net/protocols | awk '$1~/^MPTCP$/{print $3}' +} + __chk_nr() { - local condition="$1" + local command="$1" local expected=$2 local msg nr shift 2 msg=$* - nr=$(ss -inmHMN $ns | $condition) + nr=$(eval $command) printf "%-50s" "$msg" if [ $nr != $expected ]; then @@ -57,9 +67,17 @@ __chk_nr() test_cnt=$((test_cnt+1)) } +__chk_msk_nr() +{ + local condition=$1 + shift 1 + + __chk_nr "ss -inmHMN $ns | $condition" $* +} + chk_msk_nr() { - __chk_nr "grep -c token:" $* + __chk_msk_nr "grep -c token:" $* } wait_msk_nr() @@ -97,12 +115,12 @@ wait_msk_nr() chk_msk_fallback_nr() { - __chk_nr "grep -c fallback" $* + __chk_msk_nr "grep -c fallback" $* } chk_msk_remote_key_nr() { - __chk_nr "grep -c remote_key" $* + __chk_msk_nr "grep -c remote_key" $* } __chk_listen() @@ -142,6 +160,26 @@ chk_msk_listen() nr=$(ss -Ml $filter | wc -l) } +chk_msk_inuse() +{ + local expected=$1 + local listen_nr + + shift 1 + + listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN) + expected=$((expected + listen_nr)) + + for _ in $(seq 10); do + if [ $(get_msk_inuse) -eq $expected ];then + break + fi + sleep 0.1 + done + + __chk_nr get_msk_inuse $expected $* +} + # $1: ns, $2: port wait_local_port_listen() { @@ -195,8 +233,10 @@ wait_connected $ns 10000 chk_msk_nr 2 "after MPC handshake " chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" +chk_msk_inuse 2 "....chk 2 msk in use" flush_pids +chk_msk_inuse 0 "....chk 0 msk in use after flush" echo "a" | \ timeout ${timeout_test} \ @@ -211,8 +251,11 @@ echo "b" | \ 127.0.0.1 >/dev/null & wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" +chk_msk_inuse 1 "....chk 1 msk in use" flush_pids +chk_msk_inuse 0 "....chk 0 msk in use after flush" + NR_CLIENTS=100 for I in `seq 1 $NR_CLIENTS`; do echo "a" | \ @@ -232,6 +275,9 @@ for I in `seq 1 $NR_CLIENTS`; do done wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" +chk_msk_inuse $((NR_CLIENTS*2)) "....chk many msk in use" flush_pids +chk_msk_inuse 0 "....chk 0 msk in use after flush" + exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 8a8266957bc5..b25a31445ded 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -627,7 +627,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, char rbuf[8192]; ssize_t len; - if (fds.events == 0) + if (fds.events == 0 || quit) break; switch (poll(&fds, 1, poll_timeout)) { @@ -733,7 +733,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, } /* leave some time for late join/announce */ - if (cfg_remove) + if (cfg_remove && !quit) usleep(cfg_wait); return 0; diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile index f8293c6910c9..43a254f0e14d 100644 --- a/tools/testing/vsock/Makefile +++ b/tools/testing/vsock/Makefile @@ -1,8 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only -all: test +all: test vsock_perf test: vsock_test vsock_diag_test vsock_test: vsock_test.o timeout.o control.o util.o vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o +vsock_perf: vsock_perf.o CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE .PHONY: all test clean diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README index 4d5045e7d2c3..84ee217ba8ee 100644 --- a/tools/testing/vsock/README +++ b/tools/testing/vsock/README @@ -35,3 +35,37 @@ Invoke test binaries in both directions as follows: --control-port=$GUEST_IP \ --control-port=1234 \ --peer-cid=3 + +vsock_perf utility +------------------- +'vsock_perf' is a simple tool to measure vsock performance. It works in +sender/receiver modes: sender connect to peer at the specified port and +starts data transmission to the receiver. After data processing is done, +it prints several metrics(see below). + +Usage: +# run as sender +# connect to CID 2, port 1234, send 1G of data, tx buf size is 1M +./vsock_perf --sender 2 --port 1234 --bytes 1G --buf-size 1M + +Output: +tx performance: A Gbits/s + +Output explanation: +A is calculated as "number of bits to send" / "time in tx loop" + +# run as receiver +# listen port 1234, rx buf size is 1M, socket buf size is 1G, SO_RCVLOWAT is 64K +./vsock_perf --port 1234 --buf-size 1M --vsk-size 1G --rcvlowat 64K + +Output: +rx performance: A Gbits/s +total in 'read()': B sec +POLLIN wakeups: C +average in 'read()': D ns + +Output explanation: +A is calculated as "number of received bits" / "time in rx loop". +B is time, spent in 'read()' system call(excluding 'poll()') +C is number of 'poll()' wake ups with POLLIN bit set. +D is B / C, e.g. average amount of time, spent in single 'read()'. diff --git a/tools/testing/vsock/control.c b/tools/testing/vsock/control.c index 4874872fc5a3..d2deb4b15b94 100644 --- a/tools/testing/vsock/control.c +++ b/tools/testing/vsock/control.c @@ -141,6 +141,34 @@ void control_writeln(const char *str) timeout_end(); } +void control_writeulong(unsigned long value) +{ + char str[32]; + + if (snprintf(str, sizeof(str), "%lu", value) >= sizeof(str)) { + perror("snprintf"); + exit(EXIT_FAILURE); + } + + control_writeln(str); +} + +unsigned long control_readulong(void) +{ + unsigned long value; + char *str; + + str = control_readln(); + + if (!str) + exit(EXIT_FAILURE); + + value = strtoul(str, NULL, 10); + free(str); + + return value; +} + /* Return the next line from the control socket (without the trailing newline). * * The program terminates if a timeout occurs. diff --git a/tools/testing/vsock/control.h b/tools/testing/vsock/control.h index 51814b4f9ac1..c1f77fdb2c7a 100644 --- a/tools/testing/vsock/control.h +++ b/tools/testing/vsock/control.h @@ -9,7 +9,9 @@ void control_init(const char *control_host, const char *control_port, void control_cleanup(void); void control_writeln(const char *str); char *control_readln(void); +unsigned long control_readulong(void); void control_expectln(const char *str); bool control_cmpln(char *line, const char *str, bool fail); +void control_writeulong(unsigned long value); #endif /* CONTROL_H */ diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 2acbb7703c6a..01b636d3039a 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -395,3 +395,16 @@ void skip_test(struct test_case *test_cases, size_t test_cases_len, test_cases[test_id].skip = true; } + +unsigned long hash_djb2(const void *data, size_t len) +{ + unsigned long hash = 5381; + int i = 0; + + while (i < len) { + hash = ((hash << 5) + hash) + ((unsigned char *)data)[i]; + i++; + } + + return hash; +} diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index a3375ad2fb7f..fb99208a95ea 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -49,4 +49,5 @@ void run_tests(const struct test_case *test_cases, void list_tests(const struct test_case *test_cases); void skip_test(struct test_case *test_cases, size_t test_cases_len, const char *test_id_str); +unsigned long hash_djb2(const void *data, size_t len); #endif /* UTIL_H */ diff --git a/tools/testing/vsock/vsock_perf.c b/tools/testing/vsock/vsock_perf.c new file mode 100644 index 000000000000..a72520338f84 --- /dev/null +++ b/tools/testing/vsock/vsock_perf.c @@ -0,0 +1,427 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vsock_perf - benchmark utility for vsock. + * + * Copyright (C) 2022 SberDevices. + * + * Author: Arseniy Krasnov <AVKrasnov@sberdevices.ru> + */ +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <time.h> +#include <stdint.h> +#include <poll.h> +#include <sys/socket.h> +#include <linux/vm_sockets.h> + +#define DEFAULT_BUF_SIZE_BYTES (128 * 1024) +#define DEFAULT_TO_SEND_BYTES (64 * 1024) +#define DEFAULT_VSOCK_BUF_BYTES (256 * 1024) +#define DEFAULT_RCVLOWAT_BYTES 1 +#define DEFAULT_PORT 1234 + +#define BYTES_PER_GB (1024 * 1024 * 1024ULL) +#define NSEC_PER_SEC (1000000000ULL) + +static unsigned int port = DEFAULT_PORT; +static unsigned long buf_size_bytes = DEFAULT_BUF_SIZE_BYTES; +static unsigned long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES; + +static void error(const char *s) +{ + perror(s); + exit(EXIT_FAILURE); +} + +static time_t current_nsec(void) +{ + struct timespec ts; + + if (clock_gettime(CLOCK_REALTIME, &ts)) + error("clock_gettime"); + + return (ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec; +} + +/* From lib/cmdline.c. */ +static unsigned long memparse(const char *ptr) +{ + char *endptr; + + unsigned long long ret = strtoull(ptr, &endptr, 0); + + switch (*endptr) { + case 'E': + case 'e': + ret <<= 10; + case 'P': + case 'p': + ret <<= 10; + case 'T': + case 't': + ret <<= 10; + case 'G': + case 'g': + ret <<= 10; + case 'M': + case 'm': + ret <<= 10; + case 'K': + case 'k': + ret <<= 10; + endptr++; + default: + break; + } + + return ret; +} + +static void vsock_increase_buf_size(int fd) +{ + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, + &vsock_buf_bytes, sizeof(vsock_buf_bytes))) + error("setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)"); + + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + &vsock_buf_bytes, sizeof(vsock_buf_bytes))) + error("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); +} + +static int vsock_connect(unsigned int cid, unsigned int port) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = port, + .svm_cid = cid, + }, + }; + int fd; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (fd < 0) { + perror("socket"); + return -1; + } + + if (connect(fd, &addr.sa, sizeof(addr.svm)) < 0) { + perror("connect"); + close(fd); + return -1; + } + + return fd; +} + +static float get_gbps(unsigned long bits, time_t ns_delta) +{ + return ((float)bits / 1000000000ULL) / + ((float)ns_delta / NSEC_PER_SEC); +} + +static void run_receiver(unsigned long rcvlowat_bytes) +{ + unsigned int read_cnt; + time_t rx_begin_ns; + time_t in_read_ns; + size_t total_recv; + int client_fd; + char *data; + int fd; + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = port, + .svm_cid = VMADDR_CID_ANY, + }, + }; + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } clientaddr; + + socklen_t clientaddr_len = sizeof(clientaddr.svm); + + printf("Run as receiver\n"); + printf("Listen port %u\n", port); + printf("RX buffer %lu bytes\n", buf_size_bytes); + printf("vsock buffer %lu bytes\n", vsock_buf_bytes); + printf("SO_RCVLOWAT %lu bytes\n", rcvlowat_bytes); + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (fd < 0) + error("socket"); + + if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) + error("bind"); + + if (listen(fd, 1) < 0) + error("listen"); + + client_fd = accept(fd, &clientaddr.sa, &clientaddr_len); + + if (client_fd < 0) + error("accept"); + + vsock_increase_buf_size(client_fd); + + if (setsockopt(client_fd, SOL_SOCKET, SO_RCVLOWAT, + &rcvlowat_bytes, + sizeof(rcvlowat_bytes))) + error("setsockopt(SO_RCVLOWAT)"); + + data = malloc(buf_size_bytes); + + if (!data) { + fprintf(stderr, "'malloc()' failed\n"); + exit(EXIT_FAILURE); + } + + read_cnt = 0; + in_read_ns = 0; + total_recv = 0; + rx_begin_ns = current_nsec(); + + while (1) { + struct pollfd fds = { 0 }; + + fds.fd = client_fd; + fds.events = POLLIN | POLLERR | + POLLHUP | POLLRDHUP; + + if (poll(&fds, 1, -1) < 0) + error("poll"); + + if (fds.revents & POLLERR) { + fprintf(stderr, "'poll()' error\n"); + exit(EXIT_FAILURE); + } + + if (fds.revents & POLLIN) { + ssize_t bytes_read; + time_t t; + + t = current_nsec(); + bytes_read = read(fds.fd, data, buf_size_bytes); + in_read_ns += (current_nsec() - t); + read_cnt++; + + if (!bytes_read) + break; + + if (bytes_read < 0) { + perror("read"); + exit(EXIT_FAILURE); + } + + total_recv += bytes_read; + } + + if (fds.revents & (POLLHUP | POLLRDHUP)) + break; + } + + printf("total bytes received: %zu\n", total_recv); + printf("rx performance: %f Gbits/s\n", + get_gbps(total_recv * 8, current_nsec() - rx_begin_ns)); + printf("total time in 'read()': %f sec\n", (float)in_read_ns / NSEC_PER_SEC); + printf("average time in 'read()': %f ns\n", (float)in_read_ns / read_cnt); + printf("POLLIN wakeups: %i\n", read_cnt); + + free(data); + close(client_fd); + close(fd); +} + +static void run_sender(int peer_cid, unsigned long to_send_bytes) +{ + time_t tx_begin_ns; + time_t tx_total_ns; + size_t total_send; + void *data; + int fd; + + printf("Run as sender\n"); + printf("Connect to %i:%u\n", peer_cid, port); + printf("Send %lu bytes\n", to_send_bytes); + printf("TX buffer %lu bytes\n", buf_size_bytes); + + fd = vsock_connect(peer_cid, port); + + if (fd < 0) + exit(EXIT_FAILURE); + + data = malloc(buf_size_bytes); + + if (!data) { + fprintf(stderr, "'malloc()' failed\n"); + exit(EXIT_FAILURE); + } + + memset(data, 0, buf_size_bytes); + total_send = 0; + tx_begin_ns = current_nsec(); + + while (total_send < to_send_bytes) { + ssize_t sent; + + sent = write(fd, data, buf_size_bytes); + + if (sent <= 0) + error("write"); + + total_send += sent; + } + + tx_total_ns = current_nsec() - tx_begin_ns; + + printf("total bytes sent: %zu\n", total_send); + printf("tx performance: %f Gbits/s\n", + get_gbps(total_send * 8, tx_total_ns)); + printf("total time in 'write()': %f sec\n", + (float)tx_total_ns / NSEC_PER_SEC); + + close(fd); + free(data); +} + +static const char optstring[] = ""; +static const struct option longopts[] = { + { + .name = "help", + .has_arg = no_argument, + .val = 'H', + }, + { + .name = "sender", + .has_arg = required_argument, + .val = 'S', + }, + { + .name = "port", + .has_arg = required_argument, + .val = 'P', + }, + { + .name = "bytes", + .has_arg = required_argument, + .val = 'M', + }, + { + .name = "buf-size", + .has_arg = required_argument, + .val = 'B', + }, + { + .name = "vsk-size", + .has_arg = required_argument, + .val = 'V', + }, + { + .name = "rcvlowat", + .has_arg = required_argument, + .val = 'R', + }, + {}, +}; + +static void usage(void) +{ + printf("Usage: ./vsock_perf [--help] [options]\n" + "\n" + "This is benchmarking utility, to test vsock performance.\n" + "It runs in two modes: sender or receiver. In sender mode, it\n" + "connects to the specified CID and starts data transmission.\n" + "\n" + "Options:\n" + " --help This message\n" + " --sender <cid> Sender mode (receiver default)\n" + " <cid> of the receiver to connect to\n" + " --port <port> Port (default %d)\n" + " --bytes <bytes>KMG Bytes to send (default %d)\n" + " --buf-size <bytes>KMG Data buffer size (default %d). In sender mode\n" + " it is the buffer size, passed to 'write()'. In\n" + " receiver mode it is the buffer size passed to 'read()'.\n" + " --vsk-size <bytes>KMG Socket buffer size (default %d)\n" + " --rcvlowat <bytes>KMG SO_RCVLOWAT value (default %d)\n" + "\n", DEFAULT_PORT, DEFAULT_TO_SEND_BYTES, + DEFAULT_BUF_SIZE_BYTES, DEFAULT_VSOCK_BUF_BYTES, + DEFAULT_RCVLOWAT_BYTES); + exit(EXIT_FAILURE); +} + +static long strtolx(const char *arg) +{ + long value; + char *end; + + value = strtol(arg, &end, 10); + + if (end != arg + strlen(arg)) + usage(); + + return value; +} + +int main(int argc, char **argv) +{ + unsigned long to_send_bytes = DEFAULT_TO_SEND_BYTES; + unsigned long rcvlowat_bytes = DEFAULT_RCVLOWAT_BYTES; + int peer_cid = -1; + bool sender = false; + + while (1) { + int opt = getopt_long(argc, argv, optstring, longopts, NULL); + + if (opt == -1) + break; + + switch (opt) { + case 'V': /* Peer buffer size. */ + vsock_buf_bytes = memparse(optarg); + break; + case 'R': /* SO_RCVLOWAT value. */ + rcvlowat_bytes = memparse(optarg); + break; + case 'P': /* Port to connect to. */ + port = strtolx(optarg); + break; + case 'M': /* Bytes to send. */ + to_send_bytes = memparse(optarg); + break; + case 'B': /* Size of rx/tx buffer. */ + buf_size_bytes = memparse(optarg); + break; + case 'S': /* Sender mode. CID to connect to. */ + peer_cid = strtolx(optarg); + sender = true; + break; + case 'H': /* Help. */ + usage(); + break; + default: + usage(); + } + } + + if (!sender) + run_receiver(rcvlowat_bytes); + else + run_sender(peer_cid, to_send_bytes); + + return 0; +} diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index bb6d691cb30d..67e9f9df3a8c 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -284,10 +284,14 @@ static void test_stream_msg_peek_server(const struct test_opts *opts) close(fd); } -#define MESSAGES_CNT 7 -#define MSG_EOR_IDX (MESSAGES_CNT / 2) +#define SOCK_BUF_SIZE (2 * 1024 * 1024) +#define MAX_MSG_SIZE (32 * 1024) + static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) { + unsigned long curr_hash; + int page_size; + int msg_count; int fd; fd = vsock_seqpacket_connect(opts->peer_cid, 1234); @@ -296,18 +300,79 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) exit(EXIT_FAILURE); } - /* Send several messages, one with MSG_EOR flag */ - for (int i = 0; i < MESSAGES_CNT; i++) - send_byte(fd, 1, (i == MSG_EOR_IDX) ? MSG_EOR : 0); + /* Wait, until receiver sets buffer size. */ + control_expectln("SRVREADY"); + + curr_hash = 0; + page_size = getpagesize(); + msg_count = SOCK_BUF_SIZE / MAX_MSG_SIZE; + + for (int i = 0; i < msg_count; i++) { + ssize_t send_size; + size_t buf_size; + int flags; + void *buf; + + /* Use "small" buffers and "big" buffers. */ + if (i & 1) + buf_size = page_size + + (rand() % (MAX_MSG_SIZE - page_size)); + else + buf_size = 1 + (rand() % page_size); + + buf = malloc(buf_size); + + if (!buf) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + memset(buf, rand() & 0xff, buf_size); + /* Set at least one MSG_EOR + some random. */ + if (i == (msg_count / 2) || (rand() & 1)) { + flags = MSG_EOR; + curr_hash++; + } else { + flags = 0; + } + + send_size = send(fd, buf, buf_size, flags); + + if (send_size < 0) { + perror("send"); + exit(EXIT_FAILURE); + } + + if (send_size != buf_size) { + fprintf(stderr, "Invalid send size\n"); + exit(EXIT_FAILURE); + } + + /* + * Hash sum is computed at both client and server in + * the same way: + * H += hash('message data') + * Such hash "controls" both data integrity and message + * bounds. After data exchange, both sums are compared + * using control socket, and if message bounds wasn't + * broken - two values must be equal. + */ + curr_hash += hash_djb2(buf, buf_size); + free(buf); + } control_writeln("SENDDONE"); + control_writeulong(curr_hash); close(fd); } static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) { + unsigned long sock_buf_size; + unsigned long remote_hash; + unsigned long curr_hash; int fd; - char buf[16]; + char buf[MAX_MSG_SIZE]; struct msghdr msg = {0}; struct iovec iov = {0}; @@ -317,25 +382,57 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) exit(EXIT_FAILURE); } + sock_buf_size = SOCK_BUF_SIZE; + + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, + &sock_buf_size, sizeof(sock_buf_size))) { + perror("setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)"); + exit(EXIT_FAILURE); + } + + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + &sock_buf_size, sizeof(sock_buf_size))) { + perror("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); + exit(EXIT_FAILURE); + } + + /* Ready to receive data. */ + control_writeln("SRVREADY"); + /* Wait, until peer sends whole data. */ control_expectln("SENDDONE"); iov.iov_base = buf; iov.iov_len = sizeof(buf); msg.msg_iov = &iov; msg.msg_iovlen = 1; - for (int i = 0; i < MESSAGES_CNT; i++) { - if (recvmsg(fd, &msg, 0) != 1) { - perror("message bound violated"); - exit(EXIT_FAILURE); - } + curr_hash = 0; + + while (1) { + ssize_t recv_size; - if ((i == MSG_EOR_IDX) ^ !!(msg.msg_flags & MSG_EOR)) { - perror("MSG_EOR"); + recv_size = recvmsg(fd, &msg, 0); + + if (!recv_size) + break; + + if (recv_size < 0) { + perror("recvmsg"); exit(EXIT_FAILURE); } + + if (msg.msg_flags & MSG_EOR) + curr_hash++; + + curr_hash += hash_djb2(msg.msg_iov[0].iov_base, recv_size); } close(fd); + remote_hash = control_readulong(); + + if (curr_hash != remote_hash) { + fprintf(stderr, "Message bounds broken\n"); + exit(EXIT_FAILURE); + } } #define MESSAGE_TRUNC_SZ 32 @@ -427,7 +524,7 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts) tv.tv_usec = 0; if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (void *)&tv, sizeof(tv)) == -1) { - perror("setsockopt 'SO_RCVTIMEO'"); + perror("setsockopt(SO_RCVTIMEO)"); exit(EXIT_FAILURE); } @@ -472,6 +569,70 @@ static void test_seqpacket_timeout_server(const struct test_opts *opts) close(fd); } +static void test_seqpacket_bigmsg_client(const struct test_opts *opts) +{ + unsigned long sock_buf_size; + ssize_t send_size; + socklen_t len; + void *data; + int fd; + + len = sizeof(sock_buf_size); + + fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (getsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + &sock_buf_size, &len)) { + perror("getsockopt"); + exit(EXIT_FAILURE); + } + + sock_buf_size++; + + data = malloc(sock_buf_size); + if (!data) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + send_size = send(fd, data, sock_buf_size, 0); + if (send_size != -1) { + fprintf(stderr, "expected 'send(2)' failure, got %zi\n", + send_size); + exit(EXIT_FAILURE); + } + + if (errno != EMSGSIZE) { + fprintf(stderr, "expected EMSGSIZE in 'errno', got %i\n", + errno); + exit(EXIT_FAILURE); + } + + control_writeln("CLISENT"); + + free(data); + close(fd); +} + +static void test_seqpacket_bigmsg_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("CLISENT"); + + close(fd); +} + #define BUF_PATTERN_1 'a' #define BUF_PATTERN_2 'b' @@ -644,7 +805,7 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts) if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, &lowat_val, sizeof(lowat_val))) { - perror("setsockopt"); + perror("setsockopt(SO_RCVLOWAT)"); exit(EXIT_FAILURE); } @@ -754,6 +915,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_poll_rcvlowat_client, .run_server = test_stream_poll_rcvlowat_server, }, + { + .name = "SOCK_SEQPACKET big message", + .run_client = test_seqpacket_bigmsg_client, + .run_server = test_seqpacket_bigmsg_server, + }, {}, }; @@ -837,6 +1003,7 @@ int main(int argc, char **argv) .peer_cid = VMADDR_CID_ANY, }; + srand(time(NULL)); init_signals(); for (;;) { |