From eb746180132a555da8cfb02d98cb6d0a9d58e870 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 17 Aug 2023 00:23:34 +0100 Subject: riscv: dma-mapping: only invalidate after DMA, not flush No other architecture intentionally writes back dirty cache lines into a buffer that a device has just finished writing into. If the cache is clean, this has no effect at all, but if a cacheline in the buffer has actually been written by the CPU, there is a driver bug that is likely made worse by overwriting that buffer. Signed-off-by: Arnd Bergmann Reviewed-by: Conor Dooley Reviewed-by: Lad Prabhakar Acked-by: Palmer Dabbelt Signed-off-by: Lad Prabhakar Link: https://lore.kernel.org/r/20230816232336.164413-2-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/dma-noncoherent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index d51a75864e53..94614cf61cdd 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -42,7 +42,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, break; case DMA_FROM_DEVICE: case DMA_BIDIRECTIONAL: - ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP(inval, vaddr, size, riscv_cbom_block_size); break; default: break; -- cgit v1.2.3 From 482069ebdc1d61fba14527055894b9f4f0ced08c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 17 Aug 2023 00:23:35 +0100 Subject: riscv: dma-mapping: skip invalidation before bidirectional DMA For a DMA_BIDIRECTIONAL transfer, the caches have to be cleaned first to let the device see data written by the CPU, and invalidated after the transfer to let the CPU see data written by the device. riscv also invalidates the caches before the transfer, which does not appear to serve any purpose. Signed-off-by: Arnd Bergmann Reviewed-by: Conor Dooley Reviewed-by: Lad Prabhakar Acked-by: Palmer Dabbelt Acked-by: Guo Ren Signed-off-by: Lad Prabhakar Link: https://lore.kernel.org/r/20230816232336.164413-3-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/dma-noncoherent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index 94614cf61cdd..fc6377a64c8d 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -25,7 +25,7 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); break; case DMA_BIDIRECTIONAL: - ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); break; default: break; -- cgit v1.2.3 From 935730160738a478dbf4b61cf0cfc29c1442fb4e Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Thu, 17 Aug 2023 00:23:36 +0100 Subject: riscv: dma-mapping: switch over to generic implementation Add helper functions for cache wback/inval/clean and use them arch_sync_dma_for_device()/arch_sync_dma_for_cpu() functions. The proposed changes are in preparation for switching over to generic implementation. Reorganization of the code is based on the patch (Link[0]) from Arnd. For now I have dropped CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU check as this will be enabled by default upon selection of RISCV_DMA_NONCOHERENT and also dropped arch_dma_mark_dcache_clean(). Link[0]: https://lore.kernel.org/all/20230327121317.4081816-22-arnd@kernel.org/ Signed-off-by: Arnd Bergmann Signed-off-by: Lad Prabhakar Link: https://lore.kernel.org/r/20230816232336.164413-4-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/dma-noncoherent.c | 60 ++++++++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 9 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index fc6377a64c8d..06b8fea58e20 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -12,21 +12,61 @@ static bool noncoherent_supported __ro_after_init; -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) +static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size) +{ + void *vaddr = phys_to_virt(paddr); + + ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); +} + +static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size) +{ + void *vaddr = phys_to_virt(paddr); + + ALT_CMO_OP(inval, vaddr, size, riscv_cbom_block_size); +} + +static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size) { void *vaddr = phys_to_virt(paddr); + ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size); +} + +static inline bool arch_sync_dma_clean_before_fromdevice(void) +{ + return true; +} + +static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void) +{ + return true; +} + +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ switch (dir) { case DMA_TO_DEVICE: - ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); + arch_dma_cache_wback(paddr, size); break; + case DMA_FROM_DEVICE: - ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); - break; + if (!arch_sync_dma_clean_before_fromdevice()) { + arch_dma_cache_inv(paddr, size); + break; + } + fallthrough; + case DMA_BIDIRECTIONAL: - ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); + /* Skip the invalidate here if it's done later */ + if (IS_ENABLED(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) && + arch_sync_dma_cpu_needs_post_dma_flush()) + arch_dma_cache_wback(paddr, size); + else + arch_dma_cache_wback_inv(paddr, size); break; + default: break; } @@ -35,15 +75,17 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - void *vaddr = phys_to_virt(paddr); - switch (dir) { case DMA_TO_DEVICE: break; + case DMA_FROM_DEVICE: case DMA_BIDIRECTIONAL: - ALT_CMO_OP(inval, vaddr, size, riscv_cbom_block_size); + /* FROM_DEVICE invalidate needed if speculative CPU prefetch only */ + if (arch_sync_dma_cpu_needs_post_dma_flush()) + arch_dma_cache_inv(paddr, size); break; + default: break; } -- cgit v1.2.3