From 249d2e6f597b72769e158bfa0f64051de645ce3d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 1 Aug 2022 15:02:37 -0600 Subject: dt-bindings: dma: arm,pl330: Add missing 'iommus' property The pl330 can be behind an IOMMU which is the case for Arm Juno board. Add the 'iommus' property allowing for 1 IOMMU entry per channel for writes and 1 IOMMU entry for reads. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20220801210237.1501488-1-robh@kernel.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/arm,pl330.yaml | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/dma/arm,pl330.yaml b/Documentation/devicetree/bindings/dma/arm,pl330.yaml index 2bec69b308f8..4a3dd6f5309b 100644 --- a/Documentation/devicetree/bindings/dma/arm,pl330.yaml +++ b/Documentation/devicetree/bindings/dma/arm,pl330.yaml @@ -55,6 +55,12 @@ properties: dma-coherent: true + iommus: + minItems: 1 + maxItems: 9 + description: Up to 1 IOMMU entry per DMA channel for writes and 1 + IOMMU entry for reads. + power-domains: maxItems: 1 -- cgit v1.2.3 From abd7bb690bcf662afc8abe74fd3c10f7ad1c5b19 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 12 Aug 2022 10:27:19 +0200 Subject: dt-bindings: dmaengine: qcom: gpi: add compatible for SM6350 Document the compatible for GPI DMA controller on SM6350 SoC. Signed-off-by: Luca Weiss Reviewed-by: Bjorn Andersson Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220812082721.1125759-2-luca.weiss@fairphone.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index 7d2fc4eb5530..eabf8a76d3a0 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -21,6 +21,7 @@ properties: enum: - qcom,sc7280-gpi-dma - qcom,sdm845-gpi-dma + - qcom,sm6350-gpi-dma - qcom,sm8150-gpi-dma - qcom,sm8250-gpi-dma - qcom,sm8350-gpi-dma -- cgit v1.2.3 From c6c1a365d6115f159572106b0bfd9796b0bffd27 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 29 Aug 2022 17:46:43 +0200 Subject: docs: arm: stm32: introduce STM32 DMA-MDMA chaining feature STM32 DMA-MDMA chaining feature is available on STM32 SoCs which embed STM32 DMAMUX, DMA and MDMA controllers. It is the case on STM32MP1 SoCs but also on STM32H7 SoCs. But focus is on STM32MP1 SoCs, using DDR. This documentation aims to explain how to use STM32 DMA-MDMA chaining feature in drivers of STM32 peripheral having request lines on STM32 DMA. Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20220829154646.29867-4-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- Documentation/arm/index.rst | 1 + .../arm/stm32/stm32-dma-mdma-chaining.rst | 415 +++++++++++++++++++++ 2 files changed, 416 insertions(+) create mode 100644 Documentation/arm/stm32/stm32-dma-mdma-chaining.rst (limited to 'Documentation') diff --git a/Documentation/arm/index.rst b/Documentation/arm/index.rst index 495ada7915e1..8c636d4a061f 100644 --- a/Documentation/arm/index.rst +++ b/Documentation/arm/index.rst @@ -59,6 +59,7 @@ SoC-specific documents stm32/stm32f429-overview stm32/stm32mp13-overview stm32/stm32mp157-overview + stm32/stm32-dma-mdma-chaining sunxi diff --git a/Documentation/arm/stm32/stm32-dma-mdma-chaining.rst b/Documentation/arm/stm32/stm32-dma-mdma-chaining.rst new file mode 100644 index 000000000000..2945e0e33104 --- /dev/null +++ b/Documentation/arm/stm32/stm32-dma-mdma-chaining.rst @@ -0,0 +1,415 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================= +STM32 DMA-MDMA chaining +======================= + + +Introduction +------------ + + This document describes the STM32 DMA-MDMA chaining feature. But before going + further, let's introduce the peripherals involved. + + To offload data transfers from the CPU, STM32 microprocessors (MPUs) embed + direct memory access controllers (DMA). + + STM32MP1 SoCs embed both STM32 DMA and STM32 MDMA controllers. STM32 DMA + request routing capabilities are enhanced by a DMA request multiplexer + (STM32 DMAMUX). + + **STM32 DMAMUX** + + STM32 DMAMUX routes any DMA request from a given peripheral to any STM32 DMA + controller (STM32MP1 counts two STM32 DMA controllers) channels. + + **STM32 DMA** + + STM32 DMA is mainly used to implement central data buffer storage (usually in + the system SRAM) for different peripheral. It can access external RAMs but + without the ability to generate convenient burst transfer ensuring the best + load of the AXI. + + **STM32 MDMA** + + STM32 MDMA (Master DMA) is mainly used to manage direct data transfers between + RAM data buffers without CPU intervention. It can also be used in a + hierarchical structure that uses STM32 DMA as first level data buffer + interfaces for AHB peripherals, while the STM32 MDMA acts as a second level + DMA with better performance. As a AXI/AHB master, STM32 MDMA can take control + of the AXI/AHB bus. + + +Principles +---------- + + STM32 DMA-MDMA chaining feature relies on the strengths of STM32 DMA and + STM32 MDMA controllers. + + STM32 DMA has a circular Double Buffer Mode (DBM). At each end of transaction + (when DMA data counter - DMA_SxNDTR - reaches 0), the memory pointers + (configured with DMA_SxSM0AR and DMA_SxM1AR) are swapped and the DMA data + counter is automatically reloaded. This allows the SW or the STM32 MDMA to + process one memory area while the second memory area is being filled/used by + the STM32 DMA transfer. + + With STM32 MDMA linked-list mode, a single request initiates the data array + (collection of nodes) to be transferred until the linked-list pointer for the + channel is null. The channel transfer complete of the last node is the end of + transfer, unless first and last nodes are linked to each other, in such a + case, the linked-list loops on to create a circular MDMA transfer. + + STM32 MDMA has direct connections with STM32 DMA. This enables autonomous + communication and synchronization between peripherals, thus saving CPU + resources and bus congestion. Transfer Complete signal of STM32 DMA channel + can triggers STM32 MDMA transfer. STM32 MDMA can clear the request generated + by the STM32 DMA by writing to its Interrupt Clear register (whose address is + stored in MDMA_CxMAR, and bit mask in MDMA_CxMDR). + + .. table:: STM32 MDMA interconnect table with STM32 DMA + + +--------------+----------------+-----------+------------+ + | STM32 DMAMUX | STM32 DMA | STM32 DMA | STM32 MDMA | + | channels | channels | Transfer | request | + | | | complete | | + | | | signal | | + +==============+================+===========+============+ + | Channel *0* | DMA1 channel 0 | dma1_tcf0 | *0x00* | + +--------------+----------------+-----------+------------+ + | Channel *1* | DMA1 channel 1 | dma1_tcf1 | *0x01* | + +--------------+----------------+-----------+------------+ + | Channel *2* | DMA1 channel 2 | dma1_tcf2 | *0x02* | + +--------------+----------------+-----------+------------+ + | Channel *3* | DMA1 channel 3 | dma1_tcf3 | *0x03* | + +--------------+----------------+-----------+------------+ + | Channel *4* | DMA1 channel 4 | dma1_tcf4 | *0x04* | + +--------------+----------------+-----------+------------+ + | Channel *5* | DMA1 channel 5 | dma1_tcf5 | *0x05* | + +--------------+----------------+-----------+------------+ + | Channel *6* | DMA1 channel 6 | dma1_tcf6 | *0x06* | + +--------------+----------------+-----------+------------+ + | Channel *7* | DMA1 channel 7 | dma1_tcf7 | *0x07* | + +--------------+----------------+-----------+------------+ + | Channel *8* | DMA2 channel 0 | dma2_tcf0 | *0x08* | + +--------------+----------------+-----------+------------+ + | Channel *9* | DMA2 channel 1 | dma2_tcf1 | *0x09* | + +--------------+----------------+-----------+------------+ + | Channel *10* | DMA2 channel 2 | dma2_tcf2 | *0x0A* | + +--------------+----------------+-----------+------------+ + | Channel *11* | DMA2 channel 3 | dma2_tcf3 | *0x0B* | + +--------------+----------------+-----------+------------+ + | Channel *12* | DMA2 channel 4 | dma2_tcf4 | *0x0C* | + +--------------+----------------+-----------+------------+ + | Channel *13* | DMA2 channel 5 | dma2_tcf5 | *0x0D* | + +--------------+----------------+-----------+------------+ + | Channel *14* | DMA2 channel 6 | dma2_tcf6 | *0x0E* | + +--------------+----------------+-----------+------------+ + | Channel *15* | DMA2 channel 7 | dma2_tcf7 | *0x0F* | + +--------------+----------------+-----------+------------+ + + STM32 DMA-MDMA chaining feature then uses a SRAM buffer. STM32MP1 SoCs embed + three fast access static internal RAMs of various size, used for data storage. + Due to STM32 DMA legacy (within microcontrollers), STM32 DMA performances are + bad with DDR, while they are optimal with SRAM. Hence the SRAM buffer used + between STM32 DMA and STM32 MDMA. This buffer is split in two equal periods + and STM32 DMA uses one period while STM32 MDMA uses the other period + simultaneously. + :: + + dma[1:2]-tcf[0:7] + .----------------. + ____________ ' _________ V____________ + | STM32 DMA | / __|>_ \ | STM32 MDMA | + |------------| | / \ | |------------| + | DMA_SxM0AR |<=>| | SRAM | |<=>| []-[]...[] | + | DMA_SxM1AR | | \_____/ | | | + |____________| \___<|____/ |____________| + + STM32 DMA-MDMA chaining uses (struct dma_slave_config).peripheral_config to + exchange the parameters needed to configure MDMA. These parameters are + gathered into a u32 array with three values: + + * the STM32 MDMA request (which is actually the DMAMUX channel ID), + * the address of the STM32 DMA register to clear the Transfer Complete + interrupt flag, + * the mask of the Transfer Complete interrupt flag of the STM32 DMA channel. + +Device Tree updates for STM32 DMA-MDMA chaining support +------------------------------------------------------- + + **1. Allocate a SRAM buffer** + + SRAM device tree node is defined in SoC device tree. You can refer to it in + your board device tree to define your SRAM pool. + :: + + &sram { + my_foo_device_dma_pool: dma-sram@0 { + reg = <0x0 0x1000>; + }; + }; + + Be careful of the start index, in case there are other SRAM consumers. + Define your pool size strategically: to optimise chaining, the idea is that + STM32 DMA and STM32 MDMA can work simultaneously, on each buffer of the + SRAM. + If the SRAM period is greater than the expected DMA transfer, then STM32 DMA + and STM32 MDMA will work sequentially instead of simultaneously. It is not a + functional issue but it is not optimal. + + Don't forget to refer to your SRAM pool in your device node. You need to + define a new property. + :: + + &my_foo_device { + ... + my_dma_pool = &my_foo_device_dma_pool; + }; + + Then get this SRAM pool in your foo driver and allocate your SRAM buffer. + + **2. Allocate a STM32 DMA channel and a STM32 MDMA channel** + + You need to define an extra channel in your device tree node, in addition to + the one you should already have for "classic" DMA operation. + + This new channel must be taken from STM32 MDMA channels, so, the phandle of + the DMA controller to use is the MDMA controller's one. + :: + + &my_foo_device { + [...] + my_dma_pool = &my_foo_device_dma_pool; + dmas = <&dmamux1 ...>, // STM32 DMA channel + <&mdma1 0 0x3 0x1200000a 0 0>; // + STM32 MDMA channel + }; + + Concerning STM32 MDMA bindings: + + 1. The request line number : whatever the value here, it will be overwritten + by MDMA driver with the STM32 DMAMUX channel ID passed through + (struct dma_slave_config).peripheral_config + + 2. The priority level : choose Very High (0x3) so that your channel will + take priority other the other during request arbitration + + 3. A 32bit mask specifying the DMA channel configuration : source and + destination address increment, block transfer with 128 bytes per single + transfer + + 4. The 32bit value specifying the register to be used to acknowledge the + request: it will be overwritten by MDMA driver, with the DMA channel + interrupt flag clear register address passed through + (struct dma_slave_config).peripheral_config + + 5. The 32bit mask specifying the value to be written to acknowledge the + request: it will be overwritten by MDMA driver, with the DMA channel + Transfer Complete flag passed through + (struct dma_slave_config).peripheral_config + +Driver updates for STM32 DMA-MDMA chaining support in foo driver +---------------------------------------------------------------- + + **0. (optional) Refactor the original sg_table if dmaengine_prep_slave_sg()** + + In case of dmaengine_prep_slave_sg(), the original sg_table can't be used as + is. Two new sg_tables must be created from the original one. One for + STM32 DMA transfer (where memory address targets now the SRAM buffer instead + of DDR buffer) and one for STM32 MDMA transfer (where memory address targets + the DDR buffer). + + The new sg_list items must fit SRAM period length. Here is an example for + DMA_DEV_TO_MEM: + :: + + /* + * Assuming sgl and nents, respectively the initial scatterlist and its + * length. + * Assuming sram_dma_buf and sram_period, respectively the memory + * allocated from the pool for DMA usage, and the length of the period, + * which is half of the sram_buf size. + */ + struct sg_table new_dma_sgt, new_mdma_sgt; + struct scatterlist *s, *_sgl; + dma_addr_t ddr_dma_buf; + u32 new_nents = 0, len; + int i; + + /* Count the number of entries needed */ + for_each_sg(sgl, s, nents, i) + if (sg_dma_len(s) > sram_period) + new_nents += DIV_ROUND_UP(sg_dma_len(s), sram_period); + else + new_nents++; + + /* Create sg table for STM32 DMA channel */ + ret = sg_alloc_table(&new_dma_sgt, new_nents, GFP_ATOMIC); + if (ret) + dev_err(dev, "DMA sg table alloc failed\n"); + + for_each_sg(new_dma_sgt.sgl, s, new_dma_sgt.nents, i) { + _sgl = sgl; + sg_dma_len(s) = min(sg_dma_len(_sgl), sram_period); + /* Targets the beginning = first half of the sram_buf */ + s->dma_address = sram_buf; + /* + * Targets the second half of the sram_buf + * for odd indexes of the item of the sg_list + */ + if (i & 1) + s->dma_address += sram_period; + } + + /* Create sg table for STM32 MDMA channel */ + ret = sg_alloc_table(&new_mdma_sgt, new_nents, GFP_ATOMIC); + if (ret) + dev_err(dev, "MDMA sg_table alloc failed\n"); + + _sgl = sgl; + len = sg_dma_len(sgl); + ddr_dma_buf = sg_dma_address(sgl); + for_each_sg(mdma_sgt.sgl, s, mdma_sgt.nents, i) { + size_t bytes = min_t(size_t, len, sram_period); + + sg_dma_len(s) = bytes; + sg_dma_address(s) = ddr_dma_buf; + len -= bytes; + + if (!len && sg_next(_sgl)) { + _sgl = sg_next(_sgl); + len = sg_dma_len(_sgl); + ddr_dma_buf = sg_dma_address(_sgl); + } else { + ddr_dma_buf += bytes; + } + } + + Don't forget to release these new sg_tables after getting the descriptors + with dmaengine_prep_slave_sg(). + + **1. Set controller specific parameters** + + First, use dmaengine_slave_config() with a struct dma_slave_config to + configure STM32 DMA channel. You just have to take care of DMA addresses, + the memory address (depending on the transfer direction) must point on your + SRAM buffer, and set (struct dma_slave_config).peripheral_size != 0. + + STM32 DMA driver will check (struct dma_slave_config).peripheral_size to + determine if chaining is being used or not. If it is used, then STM32 DMA + driver fills (struct dma_slave_config).peripheral_config with an array of + three u32 : the first one containing STM32 DMAMUX channel ID, the second one + the channel interrupt flag clear register address, and the third one the + channel Transfer Complete flag mask. + + Then, use dmaengine_slave_config with another struct dma_slave_config to + configure STM32 MDMA channel. Take care of DMA addresses, the device address + (depending on the transfer direction) must point on your SRAM buffer, and + the memory address must point to the buffer originally used for "classic" + DMA operation. Use the previous (struct dma_slave_config).peripheral_size + and .peripheral_config that have been updated by STM32 DMA driver, to set + (struct dma_slave_config).peripheral_size and .peripheral_config of the + struct dma_slave_config to configure STM32 MDMA channel. + :: + + struct dma_slave_config dma_conf; + struct dma_slave_config mdma_conf; + + memset(&dma_conf, 0, sizeof(dma_conf)); + [...] + config.direction = DMA_DEV_TO_MEM; + config.dst_addr = sram_dma_buf; // SRAM buffer + config.peripheral_size = 1; // peripheral_size != 0 => chaining + + dmaengine_slave_config(dma_chan, &dma_config); + + memset(&mdma_conf, 0, sizeof(mdma_conf)); + config.direction = DMA_DEV_TO_MEM; + mdma_conf.src_addr = sram_dma_buf; // SRAM buffer + mdma_conf.dst_addr = rx_dma_buf; // original memory buffer + mdma_conf.peripheral_size = dma_conf.peripheral_size; // <- dma_conf + mdma_conf.peripheral_config = dma_config.peripheral_config; // <- dma_conf + + dmaengine_slave_config(mdma_chan, &mdma_conf); + + **2. Get a descriptor for STM32 DMA channel transaction** + + In the same way you get your descriptor for your "classic" DMA operation, + you just have to replace the original sg_list (in case of + dmaengine_prep_slave_sg()) with the new sg_list using SRAM buffer, or to + replace the original buffer address, length and period (in case of + dmaengine_prep_dma_cyclic()) with the new SRAM buffer. + + **3. Get a descriptor for STM32 MDMA channel transaction** + + If you previously get descriptor (for STM32 DMA) with + + * dmaengine_prep_slave_sg(), then use dmaengine_prep_slave_sg() for + STM32 MDMA; + * dmaengine_prep_dma_cyclic(), then use dmaengine_prep_dma_cyclic() for + STM32 MDMA. + + Use the new sg_list using SRAM buffer (in case of dmaengine_prep_slave_sg()) + or, depending on the transfer direction, either the original DDR buffer (in + case of DMA_DEV_TO_MEM) or the SRAM buffer (in case of DMA_MEM_TO_DEV), the + source address being previously set with dmaengine_slave_config(). + + **4. Submit both transactions** + + Before submitting your transactions, you may need to define on which + descriptor you want a callback to be called at the end of the transfer + (dmaengine_prep_slave_sg()) or the period (dmaengine_prep_dma_cyclic()). + Depending on the direction, set the callback on the descriptor that finishes + the overal transfer: + + * DMA_DEV_TO_MEM: set the callback on the "MDMA" descriptor + * DMA_MEM_TO_DEV: set the callback on the "DMA" descriptor + + Then, submit the descriptors whatever the order, with dmaengine_tx_submit(). + + **5. Issue pending requests (and wait for callback notification)** + + As STM32 MDMA channel transfer is triggered by STM32 DMA, you must issue + STM32 MDMA channel before STM32 DMA channel. + + If any, your callback will be called to warn you about the end of the overal + transfer or the period completion. + + Don't forget to terminate both channels. STM32 DMA channel is configured in + cyclic Double-Buffer mode so it won't be disabled by HW, you need to terminate + it. STM32 MDMA channel will be stopped by HW in case of sg transfer, but not + in case of cyclic transfer. You can terminate it whatever the kind of transfer. + + **STM32 DMA-MDMA chaining DMA_MEM_TO_DEV special case** + + STM32 DMA-MDMA chaining in DMA_MEM_TO_DEV is a special case. Indeed, the + STM32 MDMA feeds the SRAM buffer with the DDR data, and the STM32 DMA reads + data from SRAM buffer. So some data (the first period) have to be copied in + SRAM buffer when the STM32 DMA starts to read. + + A trick could be pausing the STM32 DMA channel (that will raise a Transfer + Complete signal, triggering the STM32 MDMA channel), but the first data read + by the STM32 DMA could be "wrong". The proper way is to prepare the first SRAM + period with dmaengine_prep_dma_memcpy(). Then this first period should be + "removed" from the sg or the cyclic transfer. + + Due to this complexity, rather use the STM32 DMA-MDMA chaining for + DMA_DEV_TO_MEM and keep the "classic" DMA usage for DMA_MEM_TO_DEV, unless + you're not afraid. + +Resources +--------- + + Application note, datasheet and reference manual are available on ST website + (STM32MP1_). + + Dedicated focus on three application notes (AN5224_, AN4031_ & AN5001_) + dealing with STM32 DMAMUX, STM32 DMA and STM32 MDMA. + +.. _STM32MP1: https://www.st.com/en/microcontrollers-microprocessors/stm32mp1-series.html +.. _AN5224: https://www.st.com/resource/en/application_note/an5224-stm32-dmamux-the-dma-request-router-stmicroelectronics.pdf +.. _AN4031: https://www.st.com/resource/en/application_note/dm00046011-using-the-stm32f2-stm32f4-and-stm32f7-series-dma-controller-stmicroelectronics.pdf +.. _AN5001: https://www.st.com/resource/en/application_note/an5001-stm32cube-expansion-package-for-stm32h7-series-mdma-stmicroelectronics.pdf + +:Authors: + +- Amelie Delaunay \ No newline at end of file -- cgit v1.2.3 From 493c1141f791a0a8af5d1745bdf9f159f564ca3f Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 29 Jul 2022 12:44:33 +0200 Subject: dt-bindings: dma: mediatek,uart-dma: Add binding for MT6795 SoC Add mediatek,mt6795-uart-dma to the compatibles list to support the MT6795 Helio X10 SoC's UART APDMA. Signed-off-by: AngeloGioacchino Del Regno Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220729104441.39177-2-angelogioacchino.delregno@collabora.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/mediatek,uart-dma.yaml | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/dma/mediatek,uart-dma.yaml b/Documentation/devicetree/bindings/dma/mediatek,uart-dma.yaml index 19ea8dcbcbce..9ab4d81ead35 100644 --- a/Documentation/devicetree/bindings/dma/mediatek,uart-dma.yaml +++ b/Documentation/devicetree/bindings/dma/mediatek,uart-dma.yaml @@ -22,6 +22,7 @@ properties: - items: - enum: - mediatek,mt2712-uart-dma + - mediatek,mt6795-uart-dma - mediatek,mt8365-uart-dma - mediatek,mt8516-uart-dma - const: mediatek,mt6577-uart-dma -- cgit v1.2.3 From 612fcfdd1a7ccb1968052250f2622de0bdcd513b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 26 Sep 2022 17:03:24 +0200 Subject: dt-bindings: renesas,rcar-dmac: Add r8a779g0 support Document support for the Direct Memory Access Controllers (DMAC) in the Renesas R-Car V4H (R8A779G0) SoC. Signed-off-by: Geert Uytterhoeven Acked-by: Krzysztof Kozlowski Reviewed-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/0a4d40092a51345003742725aea512a815d27e89.1664204526.git.geert+renesas@glider.be Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml index 7202cd68e759..89b591a05bce 100644 --- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml +++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml @@ -45,6 +45,7 @@ properties: - enum: - renesas,dmac-r8a779a0 # R-Car V3U - renesas,dmac-r8a779f0 # R-Car S4-8 + - renesas,dmac-r8a779g0 # R-Car V4H - const: renesas,rcar-gen4-dmac # R-Car Gen4 reg: true -- cgit v1.2.3 From 19ea810e88e08f87d07aafcf82f45084c360ed03 Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Mon, 19 Sep 2022 22:07:21 -0400 Subject: Documentation: devicetree: dma: update the comments remove the double word to. Signed-off-by: Deming Wang Acked-by: Rob Herring Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20220920020721.2190-1-wangdeming@inspur.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt b/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt index b849a1ed389d..47e477cce6d2 100644 --- a/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt +++ b/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt @@ -4,7 +4,7 @@ Required properties: - compatible: "ti,dra7-dma-crossbar" for DRA7xx DMA crossbar "ti,am335x-edma-crossbar" for AM335x and AM437x - reg: Memory map for accessing module -- #dma-cells: Should be set to to match with the DMA controller's dma-cells +- #dma-cells: Should be set to match with the DMA controller's dma-cells for ti,dra7-dma-crossbar and <3> for ti,am335x-edma-crossbar. - dma-requests: Number of DMA requests the crossbar can receive - dma-masters: phandle pointing to the DMA controller -- cgit v1.2.3 From 65add05cfd6e44ea45b2b4e6135745564595cf4f Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Mon, 26 Sep 2022 16:52:00 +0530 Subject: dt-bindings: dma: Make minor fixes to qcom,bam-dma binding doc As a user recently noted, the qcom,bam-dma binding document describes the msm8974 BAM DMA node in the 'example section' incorrectly. Fix the same by making it consistent with the node present inside 'qcom-msm8974' dts file, namely the 'reg' and 'interrupt' values which are incorrect in the 'example section'. While at it also make two additioanal minor cleanups: - mention Bjorn's new email ID in the document, and - add SDM845 in the comment line for the SoCs on which qcom,bam-v1.7.0 version is supported. Signed-off-by: Bhupesh Sharma Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220926112200.1948080-1-bhupesh.sharma@linaro.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml b/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml index 9bf3a1b164f1..003098caf709 100644 --- a/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,bam-dma.yaml @@ -8,7 +8,7 @@ title: Qualcomm Technologies Inc BAM DMA controller maintainers: - Andy Gross - - Bjorn Andersson + - Bjorn Andersson allOf: - $ref: "dma-controller.yaml#" @@ -20,7 +20,7 @@ properties: - qcom,bam-v1.3.0 # MSM8974, APQ8074 and APQ8084 - qcom,bam-v1.4.0 - # MSM8916 + # MSM8916 and SDM845 - qcom,bam-v1.7.0 clocks: @@ -90,8 +90,8 @@ examples: dma-controller@f9944000 { compatible = "qcom,bam-v1.4.0"; - reg = <0xf9944000 0x15000>; - interrupts = ; + reg = <0xf9944000 0x19000>; + interrupts = ; clocks = <&gcc GCC_BLSP2_AHB_CLK>; clock-names = "bam_clk"; #dma-cells = <1>; -- cgit v1.2.3 From b0325aefd398d8b536ba46ee2e5d24252c1b2258 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Sat, 17 Sep 2022 09:12:20 -0700 Subject: dmaengine: idxd: add WQ operation cap restriction support DSA 2.0 add the capability of configuring DMA ops on a per workqueue basis. This means that certain ops can be disabled by the system administrator for certain wq. By default, all ops are available. A bitmap is used to store the ops due to total op size of 256 bits and it is more convenient to use a range list to specify which bits are enabled. One of the usage to support this is for VM migration between different iteration of devices. The newer ops are disabled in order to allow guest to migrate to a host that only support older ops. Another usage is to restrict the WQ to certain operations for QoS of performance. A sysfs of ops_config attribute is added per wq. It is only usable when the ops_config bit is set under WQ_CAP register. This means that this attribute will return -EOPNOTSUPP on DSA 1.x devices. The expected input is a range list for the bits per operation the WQ supports. Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20220917161222.2835172-4-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- Documentation/ABI/stable/sysfs-driver-dma-idxd | 11 ++++ drivers/dma/idxd/device.c | 15 ++++- drivers/dma/idxd/idxd.h | 2 + drivers/dma/idxd/init.c | 10 +++ drivers/dma/idxd/registers.h | 8 ++- drivers/dma/idxd/sysfs.c | 85 ++++++++++++++++++++++++++ 6 files changed, 128 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 0c2b613f2373..3f9f93b5e48c 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -227,6 +227,17 @@ Contact: dmaengine@vger.kernel.org Description: Indicate the number of retires for an enqcmds submission on a sharedwq. A max value to set attribute is capped at 64. +What: /sys/bus/dsa/devices/wq./op_config +Date: Sept 14, 2022 +KernelVersion: 6.0.0 +Contact: dmaengine@vger.kernel.org +Description: Shows the operation capability bits displayed in bitmap format + presented by %*pb printk() output format specifier. + The attribute can be configured when the WQ is disabled in + order to configure the WQ to accept specific bits that + correlates to the operations allowed. It's visible only + on platforms that support the capability. + What: /sys/bus/dsa/devices/engine./group_id Date: Oct 25, 2019 KernelVersion: 5.6.0 diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 88986db57743..df1c108e4bb3 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -391,6 +391,8 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq) memset(wq->name, 0, WQ_NAME_SIZE); wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER; wq->max_batch_size = WQ_DEFAULT_MAX_BATCH; + if (wq->opcap_bmap) + bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); } static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq) @@ -809,7 +811,7 @@ static int idxd_wq_config_write(struct idxd_wq *wq) struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; u32 wq_offset; - int i; + int i, n; if (!wq->group) return 0; @@ -867,6 +869,17 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes); wq->wqcfg->max_batch_shift = ilog2(wq->max_batch_size); + /* bytes 32-63 */ + if (idxd->hw.wq_cap.op_config && wq->opcap_bmap) { + memset(wq->wqcfg->op_config, 0, IDXD_MAX_OPCAP_BITS / 8); + for_each_set_bit(n, wq->opcap_bmap, IDXD_MAX_OPCAP_BITS) { + int pos = n % BITS_PER_LONG_LONG; + int idx = n / BITS_PER_LONG_LONG; + + wq->wqcfg->op_config[idx] |= BIT(pos); + } + } + dev_dbg(dev, "WQ %d CFGs\n", wq->id); for (i = 0; i < WQCFG_STRIDES(idxd); i++) { wq_offset = WQCFG_OFFSET(idxd, wq->id, i); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index fd0642b8cadc..ba6e94f0cd6e 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -196,6 +196,8 @@ struct idxd_wq { enum idxd_wq_state state; unsigned long flags; union wqcfg *wqcfg; + unsigned long *opcap_bmap; + struct dsa_hw_desc **hw_descs; int num_descs; union { diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index f6f0654a79b8..2b18d512cbfc 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -191,6 +191,16 @@ static int idxd_setup_wqs(struct idxd_device *idxd) rc = -ENOMEM; goto err; } + + if (idxd->hw.wq_cap.op_config) { + wq->opcap_bmap = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL); + if (!wq->opcap_bmap) { + put_device(conf_dev); + rc = -ENOMEM; + goto err; + } + bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); + } idxd->wqs[i] = wq; } diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 4c96ea85f843..7b95be8f0f64 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -54,7 +54,8 @@ union wq_cap_reg { u64 priority:1; u64 occupancy:1; u64 occupancy_int:1; - u64 rsvd3:10; + u64 op_config:1; + u64 rsvd3:9; }; u64 bits; } __packed; @@ -350,8 +351,11 @@ union wqcfg { /* bytes 28-31 */ u32 rsvd8; + + /* bytes 32-63 */ + u64 op_config[4]; }; - u32 bits[8]; + u32 bits[16]; } __packed; #define WQCFG_PASID_IDX 2 diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 57aeeee835ab..6dbdd74e5bae 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1058,6 +1058,68 @@ static ssize_t wq_enqcmds_retries_store(struct device *dev, struct device_attrib static struct device_attribute dev_attr_wq_enqcmds_retries = __ATTR(enqcmds_retries, 0644, wq_enqcmds_retries_show, wq_enqcmds_retries_store); +static ssize_t wq_op_config_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%*pb\n", IDXD_MAX_OPCAP_BITS, wq->opcap_bmap); +} + +static int idxd_verify_supported_opcap(struct idxd_device *idxd, unsigned long *opmask) +{ + int bit; + + /* + * The OPCAP is defined as 256 bits that represents each operation the device + * supports per bit. Iterate through all the bits and check if the input mask + * is set for bits that are not set in the OPCAP for the device. If no OPCAP + * bit is set and input mask has the bit set, then return error. + */ + for_each_set_bit(bit, opmask, IDXD_MAX_OPCAP_BITS) { + if (!test_bit(bit, idxd->opcap_bmap)) + return -EINVAL; + } + + return 0; +} + +static ssize_t wq_op_config_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + unsigned long *opmask; + int rc; + + if (wq->state != IDXD_WQ_DISABLED) + return -EPERM; + + opmask = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL); + if (!opmask) + return -ENOMEM; + + rc = bitmap_parse(buf, count, opmask, IDXD_MAX_OPCAP_BITS); + if (rc < 0) + goto err; + + rc = idxd_verify_supported_opcap(idxd, opmask); + if (rc < 0) + goto err; + + bitmap_copy(wq->opcap_bmap, opmask, IDXD_MAX_OPCAP_BITS); + + bitmap_free(opmask); + return count; + +err: + bitmap_free(opmask); + return rc; +} + +static struct device_attribute dev_attr_wq_op_config = + __ATTR(op_config, 0644, wq_op_config_show, wq_op_config_store); + static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_clients.attr, &dev_attr_wq_state.attr, @@ -1075,11 +1137,33 @@ static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_ats_disable.attr, &dev_attr_wq_occupancy.attr, &dev_attr_wq_enqcmds_retries.attr, + &dev_attr_wq_op_config.attr, NULL, }; +static bool idxd_wq_attr_op_config_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + return attr == &dev_attr_wq_op_config.attr && + !idxd->hw.wq_cap.op_config; +} + +static umode_t idxd_wq_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + + if (idxd_wq_attr_op_config_invisible(attr, idxd)) + return 0; + + return attr->mode; +} + static const struct attribute_group idxd_wq_attribute_group = { .attrs = idxd_wq_attributes, + .is_visible = idxd_wq_attr_visible, }; static const struct attribute_group *idxd_wq_attribute_groups[] = { @@ -1091,6 +1175,7 @@ static void idxd_conf_wq_release(struct device *dev) { struct idxd_wq *wq = confdev_to_wq(dev); + bitmap_free(wq->opcap_bmap); kfree(wq->wqcfg); kfree(wq); } -- cgit v1.2.3 From 1f2737521af2b7d018971f1d873856fff02d2b33 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Sat, 17 Sep 2022 09:12:21 -0700 Subject: dmaengine: idxd: add configuration for concurrent work descriptor processing Add sysfs knob to allow control of the number of work descriptors that can be concurrently processed by an engine in the group as a fraction of the Maximum Work Descriptors in Progress value specified in ENGCAP register. This control knob is part of toggle for QoS control. Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20220917161222.2835172-5-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- Documentation/ABI/stable/sysfs-driver-dma-idxd | 12 ++++++ drivers/dma/idxd/device.c | 13 ++++--- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/registers.h | 23 ++++++----- drivers/dma/idxd/sysfs.c | 53 ++++++++++++++++++++++++++ 5 files changed, 87 insertions(+), 15 deletions(-) (limited to 'Documentation') diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 3f9f93b5e48c..02a721a8ea68 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -266,3 +266,15 @@ Contact: dmaengine@vger.kernel.org Description: Indicates the number of Read Buffers reserved for the use of engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read Buffers Reserved. + +What: /sys/bus/dsa/devices/group./desc_progress_limit +Date: Sept 14, 2022 +KernelVersion: 6.0.0 +Contact: dmaengine@vger.kernel.org +Description: Allows control of the number of work descriptors that can be + concurrently processed by an engine in the group as a fraction + of the Maximum Work Descriptors in Progress value specified in + the ENGCAP register. The acceptable values are 0 (default), + 1 (1/2 of max value), 2 (1/4 of the max value), and 3 (1/8 of + the max value). It's visible only on platforms that support + the capability. diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index df1c108e4bb3..05a982e143fe 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -709,6 +709,7 @@ static void idxd_groups_clear_state(struct idxd_device *idxd) group->tc_a = -1; group->tc_b = -1; } + group->desc_progress_limit = 0; } } @@ -765,10 +766,10 @@ static void idxd_group_config_write(struct idxd_group *group) /* setup GRPFLAGS */ grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id); - iowrite32(group->grpcfg.flags.bits, idxd->reg_base + grpcfg_offset); - dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#x\n", + iowrite64(group->grpcfg.flags.bits, idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#llx\n", group->id, grpcfg_offset, - ioread32(idxd->reg_base + grpcfg_offset)); + ioread64(idxd->reg_base + grpcfg_offset)); } static int idxd_groups_config_write(struct idxd_device *idxd) @@ -929,6 +930,8 @@ static void idxd_group_flags_setup(struct idxd_device *idxd) group->grpcfg.flags.rdbufs_allowed = group->rdbufs_allowed; else group->grpcfg.flags.rdbufs_allowed = idxd->max_rdbufs; + + group->grpcfg.flags.desc_progress_limit = group->desc_progress_limit; } } @@ -1111,8 +1114,8 @@ static void idxd_group_load_config(struct idxd_group *group) } grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id); - group->grpcfg.flags.bits = ioread32(idxd->reg_base + grpcfg_offset); - dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#x\n", + group->grpcfg.flags.bits = ioread64(idxd->reg_base + grpcfg_offset); + dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#llx\n", group->id, grpcfg_offset, group->grpcfg.flags.bits); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index ba6e94f0cd6e..7ee870e5ca67 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -96,6 +96,7 @@ struct idxd_group { u8 rdbufs_reserved; int tc_a; int tc_b; + int desc_progress_limit; }; struct idxd_pmu { diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 7b95be8f0f64..2cc2543edd58 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -68,7 +68,8 @@ union group_cap_reg { u64 total_rdbufs:8; /* formerly total_tokens */ u64 rdbuf_ctrl:1; /* formerly token_en */ u64 rdbuf_limit:1; /* formerly token_limit */ - u64 rsvd:46; + u64 progress_limit:1; /* descriptor and batch descriptor */ + u64 rsvd:45; }; u64 bits; } __packed; @@ -288,16 +289,18 @@ union msix_perm { union group_flags { struct { - u32 tc_a:3; - u32 tc_b:3; - u32 rsvd:1; - u32 use_rdbuf_limit:1; - u32 rdbufs_reserved:8; - u32 rsvd2:4; - u32 rdbufs_allowed:8; - u32 rsvd3:4; + u64 tc_a:3; + u64 tc_b:3; + u64 rsvd:1; + u64 use_rdbuf_limit:1; + u64 rdbufs_reserved:8; + u64 rsvd2:4; + u64 rdbufs_allowed:8; + u64 rsvd3:4; + u64 desc_progress_limit:2; + u64 rsvd4:30; }; - u32 bits; + u64 bits; } __packed; struct grpcfg { diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 6dbdd74e5bae..3624bdeb71f6 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -443,6 +443,37 @@ static struct device_attribute dev_attr_group_traffic_class_b = __ATTR(traffic_class_b, 0644, group_traffic_class_b_show, group_traffic_class_b_store); +static ssize_t group_desc_progress_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + + return sysfs_emit(buf, "%d\n", group->desc_progress_limit); +} + +static ssize_t group_desc_progress_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_group *group = confdev_to_group(dev); + int val, rc; + + rc = kstrtoint(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (val & ~GENMASK(1, 0)) + return -EINVAL; + + group->desc_progress_limit = val; + return count; +} + +static struct device_attribute dev_attr_group_desc_progress_limit = + __ATTR(desc_progress_limit, 0644, group_desc_progress_limit_show, + group_desc_progress_limit_store); + static struct attribute *idxd_group_attributes[] = { &dev_attr_group_work_queues.attr, &dev_attr_group_engines.attr, @@ -454,11 +485,33 @@ static struct attribute *idxd_group_attributes[] = { &dev_attr_group_read_buffers_reserved.attr, &dev_attr_group_traffic_class_a.attr, &dev_attr_group_traffic_class_b.attr, + &dev_attr_group_desc_progress_limit.attr, NULL, }; +static bool idxd_group_attr_progress_limit_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + return attr == &dev_attr_group_desc_progress_limit.attr && + !idxd->hw.group_cap.progress_limit; +} + +static umode_t idxd_group_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct idxd_group *group = confdev_to_group(dev); + struct idxd_device *idxd = group->idxd; + + if (idxd_group_attr_progress_limit_invisible(attr, idxd)) + return 0; + + return attr->mode; +} + static const struct attribute_group idxd_group_attribute_group = { .attrs = idxd_group_attributes, + .is_visible = idxd_group_attr_visible, }; static const struct attribute_group *idxd_group_attribute_groups[] = { -- cgit v1.2.3 From 7ca68fa3c8ab83dfa539f16c5b4b1aec2e33320d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Sat, 17 Sep 2022 09:12:22 -0700 Subject: dmaengine: idxd: add configuration for concurrent batch descriptor processing Add sysfs knob to allow control of the number of batch descriptors that can be concurrently processed by an engine in the group as a fraction of the Maximum Work Descriptors in Progress value specfied in ENGCAP register. This control knob is part of toggle for QoS control. Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20220917161222.2835172-6-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- Documentation/ABI/stable/sysfs-driver-dma-idxd | 12 +++++++++ drivers/dma/idxd/device.c | 2 ++ drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/registers.h | 4 ++- drivers/dma/idxd/sysfs.c | 36 ++++++++++++++++++++++++-- 5 files changed, 52 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 02a721a8ea68..8e2c2c405db2 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -278,3 +278,15 @@ Description: Allows control of the number of work descriptors that can be 1 (1/2 of max value), 2 (1/4 of the max value), and 3 (1/8 of the max value). It's visible only on platforms that support the capability. + +What: /sys/bus/dsa/devices/group./batch_progress_limit +Date: Sept 14, 2022 +KernelVersion: 6.0.0 +Contact: dmaengine@vger.kernel.org +Description: Allows control of the number of batch descriptors that can be + concurrently processed by an engine in the group as a fraction + of the Maximum Batch Descriptors in Progress value specified in + the ENGCAP register. The acceptable values are 0 (default), + 1 (1/2 of max value), 2 (1/4 of the max value), and 3 (1/8 of + the max value). It's visible only on platforms that support + the capability. diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 05a982e143fe..2c1e6f6daa62 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -710,6 +710,7 @@ static void idxd_groups_clear_state(struct idxd_device *idxd) group->tc_b = -1; } group->desc_progress_limit = 0; + group->batch_progress_limit = 0; } } @@ -932,6 +933,7 @@ static void idxd_group_flags_setup(struct idxd_device *idxd) group->grpcfg.flags.rdbufs_allowed = idxd->max_rdbufs; group->grpcfg.flags.desc_progress_limit = group->desc_progress_limit; + group->grpcfg.flags.batch_progress_limit = group->batch_progress_limit; } } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 7ee870e5ca67..1196ab342f01 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -97,6 +97,7 @@ struct idxd_group { int tc_a; int tc_b; int desc_progress_limit; + int batch_progress_limit; }; struct idxd_pmu { diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 2cc2543edd58..fe3b8d04f9db 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -298,7 +298,9 @@ union group_flags { u64 rdbufs_allowed:8; u64 rsvd3:4; u64 desc_progress_limit:2; - u64 rsvd4:30; + u64 rsvd4:2; + u64 batch_progress_limit:2; + u64 rsvd5:26; }; u64 bits; } __packed; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 3624bdeb71f6..bdaccf9e0436 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -474,6 +474,36 @@ static struct device_attribute dev_attr_group_desc_progress_limit = __ATTR(desc_progress_limit, 0644, group_desc_progress_limit_show, group_desc_progress_limit_store); +static ssize_t group_batch_progress_limit_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct idxd_group *group = confdev_to_group(dev); + + return sysfs_emit(buf, "%d\n", group->batch_progress_limit); +} + +static ssize_t group_batch_progress_limit_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_group *group = confdev_to_group(dev); + int val, rc; + + rc = kstrtoint(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (val & ~GENMASK(1, 0)) + return -EINVAL; + + group->batch_progress_limit = val; + return count; +} + +static struct device_attribute dev_attr_group_batch_progress_limit = + __ATTR(batch_progress_limit, 0644, group_batch_progress_limit_show, + group_batch_progress_limit_store); static struct attribute *idxd_group_attributes[] = { &dev_attr_group_work_queues.attr, &dev_attr_group_engines.attr, @@ -486,14 +516,16 @@ static struct attribute *idxd_group_attributes[] = { &dev_attr_group_traffic_class_a.attr, &dev_attr_group_traffic_class_b.attr, &dev_attr_group_desc_progress_limit.attr, + &dev_attr_group_batch_progress_limit.attr, NULL, }; static bool idxd_group_attr_progress_limit_invisible(struct attribute *attr, struct idxd_device *idxd) { - return attr == &dev_attr_group_desc_progress_limit.attr && - !idxd->hw.group_cap.progress_limit; + return (attr == &dev_attr_group_desc_progress_limit.attr || + attr == &dev_attr_group_batch_progress_limit.attr) && + !idxd->hw.group_cap.progress_limit; } static umode_t idxd_group_attr_visible(struct kobject *kobj, -- cgit v1.2.3 From 41742afd34b7ac354ec354a3efa3651e486d8c54 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Fri, 16 Sep 2022 16:25:41 +0200 Subject: dt-bindings: dma: apple,admac: Add iommus and power-domains properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apple's ADMAC is on all supported Apple silicon SoCs behind an IOMMU and has its own power-domain. Signed-off-by: Janne Grunau Acked-by: Martin PoviĊĦer Acked-by: Krzysztof Kozlowski Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20220916142550.269905-2-j@jannau.net Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/apple,admac.yaml | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/dma/apple,admac.yaml b/Documentation/devicetree/bindings/dma/apple,admac.yaml index bdc8c129c4f5..3b1e667f7ea0 100644 --- a/Documentation/devicetree/bindings/dma/apple,admac.yaml +++ b/Documentation/devicetree/bindings/dma/apple,admac.yaml @@ -49,6 +49,13 @@ properties: in an interrupts-extended list the disconnected positions will contain an empty phandle reference <0>. + iommus: + minItems: 1 + maxItems: 2 + + power-domains: + maxItems: 1 + required: - compatible - reg -- cgit v1.2.3 From 84641a1e32cbbabfe9a808b4df79f75ed4c88576 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Wed, 14 Sep 2022 16:04:25 +0200 Subject: dt-bindings: dma: rework qcom,adm Documentation to yaml schema Rework the qcom,adm Documentation to yaml schema. This is not a pure conversion since originally the driver has changed implementation for the #dma-cells and was wrong from the start. Also the driver now handles the common DMA clients implementation with the first cell that denotes the channel number and nothing else since the client will have to provide the crci information via other means. Signed-off-by: Christian Marangi Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20220914140426.7609-1-ansuelsmth@gmail.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/dma/qcom,adm.yaml | 96 ++++++++++++++++++++++ Documentation/devicetree/bindings/dma/qcom_adm.txt | 61 -------------- 2 files changed, 96 insertions(+), 61 deletions(-) create mode 100644 Documentation/devicetree/bindings/dma/qcom,adm.yaml delete mode 100644 Documentation/devicetree/bindings/dma/qcom_adm.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/dma/qcom,adm.yaml b/Documentation/devicetree/bindings/dma/qcom,adm.yaml new file mode 100644 index 000000000000..6c08245bf5d5 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/qcom,adm.yaml @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/qcom,adm.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm ADM DMA Controller + +maintainers: + - Christian Marangi + - Bjorn Andersson + +description: | + QCOM ADM DMA controller provides DMA capabilities for + peripheral buses such as NAND and SPI. + +properties: + compatible: + const: qcom,adm + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + "#dma-cells": + const: 1 + + clocks: + items: + - description: phandle to the core clock + - description: phandle to the iface clock + + clock-names: + items: + - const: core + - const: iface + + resets: + items: + - description: phandle to the clk reset + - description: phandle to the c0 reset + - description: phandle to the c1 reset + - description: phandle to the c2 reset + + reset-names: + items: + - const: clk + - const: c0 + - const: c1 + - const: c2 + + qcom,ee: + $ref: /schemas/types.yaml#/definitions/uint32 + description: indicates the security domain identifier used in the secure world. + minimum: 0 + maximum: 255 + +required: + - compatible + - reg + - interrupts + - "#dma-cells" + - clocks + - clock-names + - resets + - reset-names + - qcom,ee + +additionalProperties: false + +examples: + - | + #include + #include + + adm_dma: dma-controller@18300000 { + compatible = "qcom,adm"; + reg = <0x18300000 0x100000>; + interrupts = <0 170 0>; + #dma-cells = <1>; + + clocks = <&gcc ADM0_CLK>, + <&gcc ADM0_PBUS_CLK>; + clock-names = "core", "iface"; + + resets = <&gcc ADM0_RESET>, + <&gcc ADM0_C0_RESET>, + <&gcc ADM0_C1_RESET>, + <&gcc ADM0_C2_RESET>; + reset-names = "clk", "c0", "c1", "c2"; + qcom,ee = <0>; + }; + +... diff --git a/Documentation/devicetree/bindings/dma/qcom_adm.txt b/Documentation/devicetree/bindings/dma/qcom_adm.txt deleted file mode 100644 index 9d3b2f917b7b..000000000000 --- a/Documentation/devicetree/bindings/dma/qcom_adm.txt +++ /dev/null @@ -1,61 +0,0 @@ -QCOM ADM DMA Controller - -Required properties: -- compatible: must contain "qcom,adm" for IPQ/APQ8064 and MSM8960 -- reg: Address range for DMA registers -- interrupts: Should contain one interrupt shared by all channels -- #dma-cells: must be <2>. First cell denotes the channel number. Second cell - denotes CRCI (client rate control interface) flow control assignment. -- clocks: Should contain the core clock and interface clock. -- clock-names: Must contain "core" for the core clock and "iface" for the - interface clock. -- resets: Must contain an entry for each entry in reset names. -- reset-names: Must include the following entries: - - clk - - c0 - - c1 - - c2 -- qcom,ee: indicates the security domain identifier used in the secure world. - -Example: - adm_dma: dma@18300000 { - compatible = "qcom,adm"; - reg = <0x18300000 0x100000>; - interrupts = <0 170 0>; - #dma-cells = <2>; - - clocks = <&gcc ADM0_CLK>, <&gcc ADM0_PBUS_CLK>; - clock-names = "core", "iface"; - - resets = <&gcc ADM0_RESET>, - <&gcc ADM0_C0_RESET>, - <&gcc ADM0_C1_RESET>, - <&gcc ADM0_C2_RESET>; - reset-names = "clk", "c0", "c1", "c2"; - qcom,ee = <0>; - }; - -DMA clients must use the format descripted in the dma.txt file, using a three -cell specifier for each channel. - -Each dmas request consists of 3 cells: - 1. phandle pointing to the DMA controller - 2. channel number - 3. CRCI assignment, if applicable. If no CRCI flow control is required, use 0. - The CRCI is used for flow control. It identifies the peripheral device that - is the source/destination for the transferred data. - -Example: - - spi4: spi@1a280000 { - spi-max-frequency = <50000000>; - - pinctrl-0 = <&spi_pins>; - pinctrl-names = "default"; - - cs-gpios = <&qcom_pinmux 20 0>; - - dmas = <&adm_dma 6 9>, - <&adm_dma 5 10>; - dma-names = "rx", "tx"; - }; -- cgit v1.2.3 From 41d8ffd7cb2add394a2626f12357770846abcf1e Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Wed, 14 Sep 2022 16:04:26 +0200 Subject: dt-bindings: dma: add additional pbus reset to qcom,adm qcom,adm require an additional reset for the pbus line. Add this missing reset to match the current implementation on ipq806x.dtsi. Signed-off-by: Christian Marangi Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20220914140426.7609-2-ansuelsmth@gmail.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,adm.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/dma/qcom,adm.yaml b/Documentation/devicetree/bindings/dma/qcom,adm.yaml index 6c08245bf5d5..6a9d7bc74aff 100644 --- a/Documentation/devicetree/bindings/dma/qcom,adm.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,adm.yaml @@ -40,6 +40,7 @@ properties: resets: items: - description: phandle to the clk reset + - description: phandle to the pbus reset - description: phandle to the c0 reset - description: phandle to the c1 reset - description: phandle to the c2 reset @@ -47,6 +48,7 @@ properties: reset-names: items: - const: clk + - const: pbus - const: c0 - const: c1 - const: c2 @@ -86,10 +88,11 @@ examples: clock-names = "core", "iface"; resets = <&gcc ADM0_RESET>, + <&gcc ADM0_PBUS_RESET>, <&gcc ADM0_C0_RESET>, <&gcc ADM0_C1_RESET>, <&gcc ADM0_C2_RESET>; - reset-names = "clk", "c0", "c1", "c2"; + reset-names = "clk", "pbus", "c0", "c1", "c2"; qcom,ee = <0>; }; -- cgit v1.2.3