summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/IPMI.txt7
-rw-r--r--Documentation/RCU/Design/Requirements/2013-08-is-it-dead.pngbin0 -> 100825 bytes
-rw-r--r--Documentation/RCU/Design/Requirements/GPpartitionReaders1.svg374
-rw-r--r--Documentation/RCU/Design/Requirements/RCUApplicability.svg237
-rw-r--r--Documentation/RCU/Design/Requirements/ReadersPartitionGP1.svg639
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.html2897
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.htmlx2741
-rwxr-xr-xDocumentation/RCU/Design/htmlqqz.sh108
-rw-r--r--Documentation/arm/keystone/Overview.txt18
-rw-r--r--Documentation/block/null_blk.txt3
-rw-r--r--Documentation/devicetree/bindings/dma/ti-edma.txt10
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mpc8xxx.txt4
-rw-r--r--Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt2
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/allwinner,sunxi-nmi.txt (renamed from Documentation/devicetree/bindings/interrupt-controller/allwinner,sun67i-sc-nmi.txt)2
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.txt74
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/technologic,ts4800.txt16
-rw-r--r--Documentation/devicetree/bindings/mtd/partition.txt7
-rw-r--r--Documentation/devicetree/bindings/net/cpsw.txt6
-rw-r--r--Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt6
-rw-r--r--Documentation/devicetree/bindings/thermal/rockchip-thermal.txt4
-rw-r--r--Documentation/filesystems/Locking6
-rw-r--r--Documentation/filesystems/porting17
-rw-r--r--Documentation/filesystems/vfs.txt21
-rw-r--r--Documentation/i2c/busses/i2c-i8011
-rw-r--r--Documentation/kernel-parameters.txt50
-rw-r--r--Documentation/memory-barriers.txt12
-rw-r--r--Documentation/networking/e100.txt14
-rw-r--r--Documentation/sysctl/kernel.txt15
-rw-r--r--Documentation/trace/events-msr.txt37
-rw-r--r--Documentation/trace/postprocess/decode_msr.py37
-rw-r--r--MAINTAINERS85
-rw-r--r--Makefile2
-rw-r--r--arch/arc/Kconfig1
-rw-r--r--arch/arc/Makefile2
-rw-r--r--arch/arc/boot/dts/axs10x_mb.dtsi1
-rw-r--r--arch/arc/boot/dts/nsim_hs.dts3
-rw-r--r--arch/arc/configs/axs101_defconfig2
-rw-r--r--arch/arc/configs/axs103_defconfig2
-rw-r--r--arch/arc/configs/axs103_smp_defconfig2
-rw-r--r--arch/arc/configs/nsim_hs_defconfig2
-rw-r--r--arch/arc/configs/nsim_hs_smp_defconfig2
-rw-r--r--arch/arc/configs/nsimosci_hs_defconfig2
-rw-r--r--arch/arc/configs/nsimosci_hs_smp_defconfig2
-rw-r--r--arch/arc/configs/vdk_hs38_defconfig2
-rw-r--r--arch/arc/configs/vdk_hs38_smp_defconfig2
-rw-r--r--arch/arc/include/asm/cache.h2
-rw-r--r--arch/arc/include/asm/irqflags-arcv2.h3
-rw-r--r--arch/arc/include/asm/irqflags-compact.h2
-rw-r--r--arch/arc/include/asm/mach_desc.h4
-rw-r--r--arch/arc/include/asm/smp.h4
-rw-r--r--arch/arc/include/asm/unwind.h4
-rw-r--r--arch/arc/kernel/ctx_sw.c2
-rw-r--r--arch/arc/kernel/ctx_sw_asm.S3
-rw-r--r--arch/arc/kernel/intc-arcv2.c15
-rw-r--r--arch/arc/kernel/irq.c33
-rw-r--r--arch/arc/kernel/mcip.c2
-rw-r--r--arch/arc/kernel/perf_event.c32
-rw-r--r--arch/arc/kernel/process.c9
-rw-r--r--arch/arc/kernel/setup.c1
-rw-r--r--arch/arc/kernel/smp.c8
-rw-r--r--arch/arc/kernel/unwind.c94
-rw-r--r--arch/arc/mm/highmem.c4
-rw-r--r--arch/arc/mm/init.c4
-rw-r--r--arch/arc/mm/tlb.c4
-rw-r--r--arch/arm/Kconfig7
-rw-r--r--arch/arm/boot/dts/am4372.dtsi4
-rw-r--r--arch/arm/boot/dts/am43xx-clocks.dtsi8
-rw-r--r--arch/arm/boot/dts/am57xx-beagle-x15.dts1
-rw-r--r--arch/arm/boot/dts/animeo_ip.dts6
-rw-r--r--arch/arm/boot/dts/armada-38x.dtsi1
-rw-r--r--arch/arm/boot/dts/at91-foxg20.dts2
-rw-r--r--arch/arm/boot/dts/at91-kizbox.dts13
-rw-r--r--arch/arm/boot/dts/at91-kizbox2.dts6
-rw-r--r--arch/arm/boot/dts/at91-kizboxmini.dts4
-rw-r--r--arch/arm/boot/dts/at91-qil_a9260.dts2
-rw-r--r--arch/arm/boot/dts/at91-sama5d2_xplained.dts116
-rw-r--r--arch/arm/boot/dts/at91-sama5d3_xplained.dts2
-rw-r--r--arch/arm/boot/dts/at91-sama5d4_xplained.dts12
-rw-r--r--arch/arm/boot/dts/at91-sama5d4ek.dts12
-rw-r--r--arch/arm/boot/dts/at91rm9200ek.dts9
-rw-r--r--arch/arm/boot/dts/at91sam9261ek.dts19
-rw-r--r--arch/arm/boot/dts/at91sam9263ek.dts13
-rw-r--r--arch/arm/boot/dts/at91sam9g20ek_common.dtsi13
-rw-r--r--arch/arm/boot/dts/at91sam9m10g45ek.dts13
-rw-r--r--arch/arm/boot/dts/at91sam9n12ek.dts11
-rw-r--r--arch/arm/boot/dts/at91sam9rlek.dts13
-rw-r--r--arch/arm/boot/dts/at91sam9x5cm.dtsi11
-rw-r--r--arch/arm/boot/dts/berlin2q.dtsi8
-rw-r--r--arch/arm/boot/dts/dm816x.dtsi8
-rw-r--r--arch/arm/boot/dts/dra7.dtsi4
-rw-r--r--arch/arm/boot/dts/imx27.dtsi16
-rw-r--r--arch/arm/boot/dts/imx6q-gw5400-a.dts2
-rw-r--r--arch/arm/boot/dts/imx6qdl-gw51xx.dtsi2
-rw-r--r--arch/arm/boot/dts/imx6qdl-gw52xx.dtsi2
-rw-r--r--arch/arm/boot/dts/imx6qdl-gw53xx.dtsi2
-rw-r--r--arch/arm/boot/dts/imx6qdl-gw54xx.dtsi2
-rw-r--r--arch/arm/boot/dts/imx6qdl-sabreauto.dtsi6
-rw-r--r--arch/arm/boot/dts/k2l-netcp.dtsi2
-rw-r--r--arch/arm/boot/dts/kirkwood-ts219.dtsi2
-rw-r--r--arch/arm/boot/dts/omap4-duovero-parlor.dts4
-rw-r--r--arch/arm/boot/dts/rk3288-veyron-minnie.dts4
-rw-r--r--arch/arm/boot/dts/rk3288.dtsi10
-rw-r--r--arch/arm/boot/dts/sama5d35ek.dts2
-rw-r--r--arch/arm/boot/dts/sama5d4.dtsi2
-rw-r--r--arch/arm/boot/dts/ste-nomadik-stn8815.dtsi6
-rw-r--r--arch/arm/boot/dts/sun6i-a31s-primo81.dts1
-rw-r--r--arch/arm/boot/dts/tegra124-nyan.dtsi2
-rw-r--r--arch/arm/boot/dts/usb_a9260_common.dtsi2
-rw-r--r--arch/arm/boot/dts/usb_a9263.dts2
-rw-r--r--arch/arm/boot/dts/versatile-ab.dts10
-rw-r--r--arch/arm/boot/dts/versatile-pb.dts20
-rw-r--r--arch/arm/boot/dts/vf610-colibri.dtsi5
-rw-r--r--arch/arm/boot/dts/vf610.dtsi2
-rw-r--r--arch/arm/boot/dts/vfxxx.dtsi14
-rw-r--r--arch/arm/boot/dts/wm8650.dtsi9
-rw-r--r--arch/arm/configs/at91_dt_defconfig1
-rw-r--r--arch/arm/configs/multi_v7_defconfig1
-rw-r--r--arch/arm/configs/sama5_defconfig1
-rw-r--r--arch/arm/configs/sunxi_defconfig1
-rw-r--r--arch/arm/include/asm/arch_gicv3.h1
-rw-r--r--arch/arm/include/asm/irq.h5
-rw-r--r--arch/arm/include/asm/kvm_emulate.h12
-rw-r--r--arch/arm/include/asm/uaccess.h4
-rw-r--r--arch/arm/include/uapi/asm/unistd.h1
-rw-r--r--arch/arm/kernel/bios32.c19
-rw-r--r--arch/arm/kernel/calls.S1
-rw-r--r--arch/arm/kernel/process.c33
-rw-r--r--arch/arm/kernel/swp_emulate.c6
-rw-r--r--arch/arm/kernel/sys_oabi-compat.c73
-rw-r--r--arch/arm/kvm/arm.c7
-rw-r--r--arch/arm/kvm/mmio.c5
-rw-r--r--arch/arm/kvm/mmu.c15
-rw-r--r--arch/arm/kvm/psci.c20
-rw-r--r--arch/arm/lib/uaccess_with_memcpy.c29
-rw-r--r--arch/arm/mach-at91/Kconfig6
-rw-r--r--arch/arm/mach-at91/pm.c7
-rw-r--r--arch/arm/mach-dove/include/mach/entry-macro.S4
-rw-r--r--arch/arm/mach-exynos/Kconfig1
-rw-r--r--arch/arm/mach-exynos/pmu.c6
-rw-r--r--arch/arm/mach-imx/gpc.c1
-rw-r--r--arch/arm/mach-ixp4xx/include/mach/io.h12
-rw-r--r--arch/arm/mach-omap2/Kconfig4
-rw-r--r--arch/arm/mach-omap2/gpmc-onenand.c14
-rw-r--r--arch/arm/mach-omap2/omap-smp.c6
-rw-r--r--arch/arm/mach-omap2/omap_hwmod.c66
-rw-r--r--arch/arm/mach-omap2/omap_hwmod.h3
-rw-r--r--arch/arm/mach-omap2/omap_hwmod_7xx_data.c56
-rw-r--r--arch/arm/mach-omap2/omap_hwmod_81xx_data.c3
-rw-r--r--arch/arm/mach-omap2/pdata-quirks.c29
-rw-r--r--arch/arm/mach-omap2/pm34xx.c4
-rw-r--r--arch/arm/mach-omap2/timer.c6
-rw-r--r--arch/arm/mach-orion5x/include/mach/entry-macro.S2
-rw-r--r--arch/arm/mach-pxa/ezx.c5
-rw-r--r--arch/arm/mach-pxa/palm27x.c2
-rw-r--r--arch/arm/mach-pxa/palmtc.c2
-rw-r--r--arch/arm/mach-s3c24xx/pll-s3c2440-12000000.c2
-rw-r--r--arch/arm/mach-s3c24xx/pll-s3c2440-16934400.c2
-rw-r--r--arch/arm/mach-shmobile/setup-r8a7793.c2
-rw-r--r--arch/arm/mach-sti/Kconfig1
-rw-r--r--arch/arm/mach-ux500/Kconfig1
-rw-r--r--arch/arm/mach-zx/Kconfig2
-rw-r--r--arch/arm/mm/context.c38
-rw-r--r--arch/arm/mm/dma-mapping.c2
-rw-r--r--arch/arm/mm/init.c92
-rw-r--r--arch/arm/mm/proc-v7.S4
-rw-r--r--arch/arm/net/bpf_jit_32.c21
-rw-r--r--arch/arm64/Kconfig23
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi5
-rw-r--r--arch/arm64/crypto/aes-ce-cipher.c2
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h1
-rw-r--r--arch/arm64/include/asm/barrier.h16
-rw-r--r--arch/arm64/include/asm/compat.h3
-rw-r--r--arch/arm64/include/asm/cpufeature.h25
-rw-r--r--arch/arm64/include/asm/dma-mapping.h13
-rw-r--r--arch/arm64/include/asm/hw_breakpoint.h6
-rw-r--r--arch/arm64/include/asm/irq.h5
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h18
-rw-r--r--arch/arm64/include/asm/mmu_context.h2
-rw-r--r--arch/arm64/include/asm/pgtable.h13
-rw-r--r--arch/arm64/kernel/cpu_errata.c9
-rw-r--r--arch/arm64/kernel/cpufeature.c37
-rw-r--r--arch/arm64/kernel/cpuinfo.c5
-rw-r--r--arch/arm64/kernel/efi.c45
-rw-r--r--arch/arm64/kernel/suspend.c10
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S5
-rw-r--r--arch/arm64/kvm/handle_exit.c2
-rw-r--r--arch/arm64/kvm/hyp.S14
-rw-r--r--arch/arm64/kvm/inject_fault.c2
-rw-r--r--arch/arm64/kvm/sys_regs.c123
-rw-r--r--arch/arm64/kvm/sys_regs.h8
-rw-r--r--arch/arm64/kvm/sys_regs_generic_v8.c4
-rw-r--r--arch/arm64/mm/context.c38
-rw-r--r--arch/arm64/mm/dma-mapping.c35
-rw-r--r--arch/arm64/mm/fault.c28
-rw-r--r--arch/arm64/mm/mmu.c91
-rw-r--r--arch/arm64/net/bpf_jit_comp.c85
-rw-r--r--arch/blackfin/include/asm/cmpxchg.h1
-rw-r--r--arch/blackfin/kernel/perf_event.c2
-rw-r--r--arch/c6x/include/asm/cmpxchg.h2
-rw-r--r--arch/frv/include/asm/cmpxchg.h2
-rw-r--r--arch/h8300/Kconfig1
-rw-r--r--arch/h8300/include/asm/io.h39
-rw-r--r--arch/h8300/kernel/setup.c8
-rw-r--r--arch/ia64/include/asm/barrier.h2
-rw-r--r--arch/ia64/include/asm/unistd.h2
-rw-r--r--arch/ia64/include/uapi/asm/unistd.h1
-rw-r--r--arch/ia64/kernel/entry.S1
-rw-r--r--arch/m32r/include/asm/Kbuild1
-rw-r--r--arch/m32r/include/asm/io.h10
-rw-r--r--arch/m68k/coldfire/m54xx.c2
-rw-r--r--arch/m68k/include/asm/unistd.h2
-rw-r--r--arch/m68k/include/uapi/asm/unistd.h1
-rw-r--r--arch/m68k/kernel/setup_no.c9
-rw-r--r--arch/m68k/kernel/syscalltable.S1
-rw-r--r--arch/m68k/mm/motorola.c2
-rw-r--r--arch/m68k/sun3/config.c4
-rw-r--r--arch/microblaze/kernel/dma.c3
-rw-r--r--arch/mips/ath79/setup.c7
-rw-r--r--arch/mips/boot/dts/qca/ar9132.dtsi2
-rw-r--r--arch/mips/include/asm/page.h3
-rw-r--r--arch/mips/include/asm/uaccess.h52
-rw-r--r--arch/mips/kernel/cps-vec.S2
-rw-r--r--arch/mips/kernel/mips_ksyms.c2
-rw-r--r--arch/mips/kvm/emulate.c2
-rw-r--r--arch/mips/kvm/locore.S16
-rw-r--r--arch/mips/kvm/mips.c5
-rw-r--r--arch/mips/lib/memset.S2
-rw-r--r--arch/mips/mm/dma-default.c2
-rw-r--r--arch/mips/net/bpf_jit.c16
-rw-r--r--arch/mips/pci/pci-rt2880.c5
-rw-r--r--arch/mips/pmcs-msp71xx/msp_setup.c5
-rw-r--r--arch/mips/sni/reset.c8
-rw-r--r--arch/mips/vdso/Makefile4
-rw-r--r--arch/mn10300/Kconfig4
-rw-r--r--arch/nios2/mm/cacheflush.c24
-rw-r--r--arch/parisc/Kconfig3
-rw-r--r--arch/parisc/include/asm/hugetlb.h85
-rw-r--r--arch/parisc/include/asm/page.h13
-rw-r--r--arch/parisc/include/asm/pgalloc.h2
-rw-r--r--arch/parisc/include/asm/pgtable.h27
-rw-r--r--arch/parisc/include/asm/processor.h27
-rw-r--r--arch/parisc/include/uapi/asm/mman.h10
-rw-r--r--arch/parisc/include/uapi/asm/unistd.h3
-rw-r--r--arch/parisc/kernel/asm-offsets.c8
-rw-r--r--arch/parisc/kernel/entry.S56
-rw-r--r--arch/parisc/kernel/head.S4
-rw-r--r--arch/parisc/kernel/pci.c18
-rw-r--r--arch/parisc/kernel/setup.c14
-rw-r--r--arch/parisc/kernel/signal.c64
-rw-r--r--arch/parisc/kernel/syscall.S4
-rw-r--r--arch/parisc/kernel/syscall_table.S1
-rw-r--r--arch/parisc/kernel/traps.c35
-rw-r--r--arch/parisc/kernel/vmlinux.lds.S9
-rw-r--r--arch/parisc/mm/Makefile1
-rw-r--r--arch/parisc/mm/hugetlbpage.c161
-rw-r--r--arch/parisc/mm/init.c40
-rw-r--r--arch/powerpc/boot/dts/sbc8641d.dts8
-rw-r--r--arch/powerpc/include/asm/barrier.h2
-rw-r--r--arch/powerpc/include/asm/reg.h1
-rw-r--r--arch/powerpc/include/asm/systbl.h25
-rw-r--r--arch/powerpc/include/asm/unistd.h2
-rw-r--r--arch/powerpc/include/uapi/asm/unistd.h13
-rw-r--r--arch/powerpc/kernel/eeh_driver.c14
-rw-r--r--arch/powerpc/kernel/process.c18
-rw-r--r--arch/powerpc/kernel/signal_32.c14
-rw-r--r--arch/powerpc/kernel/signal_64.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c6
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c13
-rw-r--r--arch/powerpc/platforms/powernv/opal-irqchip.c64
-rw-r--r--arch/powerpc/platforms/powernv/opal.c2
-rw-r--r--arch/s390/include/asm/barrier.h2
-rw-r--r--arch/s390/include/asm/cio.h1
-rw-r--r--arch/s390/include/asm/elf.h13
-rw-r--r--arch/s390/include/asm/ipl.h3
-rw-r--r--arch/s390/include/asm/pci_dma.h4
-rw-r--r--arch/s390/include/asm/trace/diag.h6
-rw-r--r--arch/s390/include/uapi/asm/unistd.h19
-rw-r--r--arch/s390/kernel/compat_wrapper.c1
-rw-r--r--arch/s390/kernel/diag.c4
-rw-r--r--arch/s390/kernel/dis.c17
-rw-r--r--arch/s390/kernel/head.S95
-rw-r--r--arch/s390/kernel/ipl.c65
-rw-r--r--arch/s390/kernel/process.c6
-rw-r--r--arch/s390/kernel/sclp.c2
-rw-r--r--arch/s390/kernel/setup.c3
-rw-r--r--arch/s390/kernel/syscalls.S1
-rw-r--r--arch/s390/kernel/trace.c6
-rw-r--r--arch/s390/kvm/interrupt.c7
-rw-r--r--arch/s390/kvm/kvm-s390.c6
-rw-r--r--arch/s390/kvm/priv.c2
-rw-r--r--arch/s390/kvm/sigp.c8
-rw-r--r--arch/s390/mm/init.c30
-rw-r--r--arch/s390/mm/mmap.c60
-rw-r--r--arch/s390/pci/pci_dma.c84
-rw-r--r--arch/sh/include/uapi/asm/unistd_64.h2
-rw-r--r--arch/sh/kernel/perf_event.c2
-rw-r--r--arch/sparc/include/asm/elf_64.h1
-rw-r--r--arch/sparc/include/uapi/asm/unistd.h7
-rw-r--r--arch/sparc/kernel/head_64.S13
-rw-r--r--arch/sparc/kernel/perf_event.c13
-rw-r--r--arch/sparc/kernel/rtrap_64.S8
-rw-r--r--arch/sparc/kernel/setup_64.c9
-rw-r--r--arch/sparc/kernel/systbls_32.S19
-rw-r--r--arch/sparc/kernel/systbls_64.S18
-rw-r--r--arch/sparc/lib/NG2copy_from_user.S8
-rw-r--r--arch/sparc/lib/NG2copy_to_user.S8
-rw-r--r--arch/sparc/lib/NG2memcpy.S118
-rw-r--r--arch/sparc/lib/NG4copy_from_user.S8
-rw-r--r--arch/sparc/lib/NG4copy_to_user.S8
-rw-r--r--arch/sparc/lib/NG4memcpy.S40
-rw-r--r--arch/sparc/lib/U1copy_from_user.S8
-rw-r--r--arch/sparc/lib/U1copy_to_user.S8
-rw-r--r--arch/sparc/lib/U1memcpy.S48
-rw-r--r--arch/sparc/lib/U3copy_from_user.S8
-rw-r--r--arch/sparc/lib/U3copy_to_user.S8
-rw-r--r--arch/sparc/lib/U3memcpy.S86
-rw-r--r--arch/sparc/net/bpf_jit_comp.c17
-rw-r--r--arch/tile/Kconfig11
-rw-r--r--arch/tile/include/asm/cmpxchg.h2
-rw-r--r--arch/tile/include/asm/page.h8
-rw-r--r--arch/tile/kernel/perf_event.c2
-rw-r--r--arch/um/Makefile2
-rw-r--r--arch/um/drivers/net_user.c10
-rw-r--r--arch/um/kernel/signal.c2
-rw-r--r--arch/x86/Kconfig19
-rw-r--r--arch/x86/Kconfig.debug2
-rw-r--r--arch/x86/boot/boot.h1
-rw-r--r--arch/x86/boot/video-mode.c2
-rw-r--r--arch/x86/boot/video.c2
-rw-r--r--arch/x86/crypto/chacha20_glue.c2
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c2
-rw-r--r--arch/x86/entry/calling.h15
-rw-r--r--arch/x86/entry/common.c6
-rw-r--r--arch/x86/entry/entry_32.S15
-rw-r--r--arch/x86/entry/entry_64.S27
-rw-r--r--arch/x86/entry/entry_64_compat.S40
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c151
-rw-r--r--arch/x86/entry/vdso/vdso-layout.lds.S3
-rw-r--r--arch/x86/entry/vdso/vdso2c.c3
-rw-r--r--arch/x86/entry/vdso/vdso32/system_call.S54
-rw-r--r--arch/x86/entry/vdso/vma.c14
-rw-r--r--arch/x86/include/asm/apic.h6
-rw-r--r--arch/x86/include/asm/atomic.h1
-rw-r--r--arch/x86/include/asm/atomic64_32.h1
-rw-r--r--arch/x86/include/asm/calgary.h2
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h2
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h2
-rw-r--r--arch/x86/include/asm/cpu.h3
-rw-r--r--arch/x86/include/asm/cpufeature.h116
-rw-r--r--arch/x86/include/asm/fixmap.h5
-rw-r--r--arch/x86/include/asm/fpu/internal.h173
-rw-r--r--arch/x86/include/asm/intel_pt.h10
-rw-r--r--arch/x86/include/asm/ipi.h2
-rw-r--r--arch/x86/include/asm/jump_label.h63
-rw-r--r--arch/x86/include/asm/microcode.h39
-rw-r--r--arch/x86/include/asm/msi.h6
-rw-r--r--arch/x86/include/asm/msr-index.h4
-rw-r--r--arch/x86/include/asm/msr-trace.h57
-rw-r--r--arch/x86/include/asm/msr.h43
-rw-r--r--arch/x86/include/asm/page_types.h22
-rw-r--r--arch/x86/include/asm/paravirt.h44
-rw-r--r--arch/x86/include/asm/paravirt_types.h40
-rw-r--r--arch/x86/include/asm/pgtable.h15
-rw-r--r--arch/x86/include/asm/pgtable_types.h14
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/pvclock.h14
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h59
-rw-r--r--arch/x86/include/asm/reboot.h1
-rw-r--r--arch/x86/include/asm/smp.h12
-rw-r--r--arch/x86/include/asm/suspend_32.h1
-rw-r--r--arch/x86/include/asm/suspend_64.h1
-rw-r--r--arch/x86/include/asm/uaccess.h9
-rw-r--r--arch/x86/include/asm/vdso.h1
-rw-r--r--arch/x86/include/asm/x86_init.h3
-rw-r--r--arch/x86/include/asm/xor_32.h2
-rw-r--r--arch/x86/include/uapi/asm/mce.h2
-rw-r--r--arch/x86/kernel/apic/apic.c45
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c19
-rw-r--r--arch/x86/kernel/apic/apic_noop.c2
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c7
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c10
-rw-r--r--arch/x86/kernel/apic/ipi.c18
-rw-r--r--arch/x86/kernel/apic/msi.c8
-rw-r--r--arch/x86/kernel/apic/probe_32.c1
-rw-r--r--arch/x86/kernel/apic/vector.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c9
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c9
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/asm-offsets.c3
-rw-r--r--arch/x86/kernel/asm-offsets_64.c1
-rw-r--r--arch/x86/kernel/cpu/amd.c8
-rw-r--r--arch/x86/kernel/cpu/centaur.c2
-rw-r--r--arch/x86/kernel/cpu/common.c68
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c93
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c13
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c16
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c11
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c38
-rw-r--r--arch/x86/kernel/cpu/perf_event.h26
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_uncore.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c117
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_cqm.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c39
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c46
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_pt.c9
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c25
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c17
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h3
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c635
-rw-r--r--arch/x86/kernel/cpu/rdrand.c25
-rw-r--r--arch/x86/kernel/cpu/scattered.c20
-rw-r--r--arch/x86/kernel/cpu/transmeta.c4
-rw-r--r--arch/x86/kernel/crash.c11
-rw-r--r--arch/x86/kernel/fpu/init.c21
-rw-r--r--arch/x86/kernel/fpu/signal.c11
-rw-r--r--arch/x86/kernel/fpu/xstate.c5
-rw-r--r--arch/x86/kernel/hw_breakpoint.c6
-rw-r--r--arch/x86/kernel/irq_work.c2
-rw-r--r--arch/x86/kernel/kvmclock.c11
-rw-r--r--arch/x86/kernel/mcount_64.S6
-rw-r--r--arch/x86/kernel/nmi.c32
-rw-r--r--arch/x86/kernel/paravirt.c36
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c2
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c3
-rw-r--r--arch/x86/kernel/pci-calgary_64.c4
-rw-r--r--arch/x86/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86/kernel/pmem.c12
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/ptrace.c15
-rw-r--r--arch/x86/kernel/pvclock.c24
-rw-r--r--arch/x86/kernel/reboot.c30
-rw-r--r--arch/x86/kernel/rtc.c3
-rw-r--r--arch/x86/kernel/setup.c4
-rw-r--r--arch/x86/kernel/signal.c17
-rw-r--r--arch/x86/kernel/smp.c4
-rw-r--r--arch/x86/kernel/smpboot.c18
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/vm86_32.c4
-rw-r--r--arch/x86/kernel/x86_init.c1
-rw-r--r--arch/x86/kvm/cpuid.h42
-rw-r--r--arch/x86/kvm/i8254.c1
-rw-r--r--arch/x86/kvm/mtrr.c25
-rw-r--r--arch/x86/kvm/svm.c21
-rw-r--r--arch/x86/kvm/vmx.c12
-rw-r--r--arch/x86/kvm/x86.c74
-rw-r--r--arch/x86/lguest/boot.c2
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/lib/cpu.c35
-rw-r--r--arch/x86/lib/msr.c26
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/debug_pagetables.c46
-rw-r--r--arch/x86/mm/dump_pagetables.c36
-rw-r--r--arch/x86/mm/ioremap.c4
-rw-r--r--arch/x86/mm/mpx.c53
-rw-r--r--arch/x86/mm/pageattr.c10
-rw-r--r--arch/x86/mm/pat.c12
-rw-r--r--arch/x86/mm/pat_rbtree.c52
-rw-r--r--arch/x86/mm/pgtable.c7
-rw-r--r--arch/x86/mm/setup_nx.c4
-rw-r--r--arch/x86/mm/srat.c2
-rw-r--r--arch/x86/pci/bus_numa.c13
-rw-r--r--arch/x86/platform/uv/uv_nmi.c1
-rw-r--r--arch/x86/power/cpu.c92
-rw-r--r--arch/x86/um/signal.c18
-rw-r--r--arch/x86/xen/enlighten.c18
-rw-r--r--arch/x86/xen/mmu.c10
-rw-r--r--arch/x86/xen/suspend.c21
-rw-r--r--arch/x86/xen/xen-asm_32.S14
-rw-r--r--arch/x86/xen/xen-asm_64.S19
-rw-r--r--arch/x86/xen/xen-ops.h3
-rw-r--r--block/blk-cgroup.c6
-rw-r--r--block/blk-core.c53
-rw-r--r--block/blk-merge.c35
-rw-r--r--block/blk-mq.c14
-rw-r--r--block/blk-settings.c36
-rw-r--r--block/blk-sysfs.c3
-rw-r--r--block/blk-timeout.c8
-rw-r--r--block/blk.h2
-rw-r--r--block/noop-iosched.c10
-rw-r--r--block/partition-generic.c2
-rw-r--r--block/partitions/mac.c10
-rw-r--r--crypto/ablkcipher.c2
-rw-r--r--crypto/algif_aead.c4
-rw-r--r--crypto/algif_skcipher.c67
-rw-r--r--crypto/async_tx/async_memcpy.c2
-rw-r--r--crypto/async_tx/async_pq.c4
-rw-r--r--crypto/async_tx/async_raid6_recov.c4
-rw-r--r--crypto/async_tx/async_xor.c4
-rw-r--r--crypto/blkcipher.c2
-rw-r--r--drivers/Makefile2
-rw-r--r--drivers/acpi/Kconfig4
-rw-r--r--drivers/acpi/cppc_acpi.c2
-rw-r--r--drivers/acpi/device_sysfs.c2
-rw-r--r--drivers/acpi/ec.c2
-rw-r--r--drivers/acpi/nfit.c67
-rw-r--r--drivers/acpi/nfit.h3
-rw-r--r--drivers/acpi/pci_root.c7
-rw-r--r--drivers/acpi/processor_driver.c3
-rw-r--r--drivers/acpi/sbshc.c48
-rw-r--r--drivers/ata/ahci.c22
-rw-r--r--drivers/ata/ahci_mvebu.c5
-rw-r--r--drivers/ata/libahci.c9
-rw-r--r--drivers/ata/libata-eh.c8
-rw-r--r--drivers/ata/sata_fsl.c3
-rw-r--r--drivers/ata/sata_sil.c3
-rw-r--r--drivers/base/memory.c4
-rw-r--r--drivers/base/platform-msi.c256
-rw-r--r--drivers/base/power/domain.c36
-rw-r--r--drivers/base/power/domain_governor.c3
-rw-r--r--drivers/base/power/wakeirq.c6
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c6
-rw-r--r--drivers/block/null_blk.c314
-rw-r--r--drivers/block/rbd.c1
-rw-r--r--drivers/block/xen-blkback/blkback.c15
-rw-r--r--drivers/block/xen-blkback/common.h8
-rw-r--r--drivers/bus/omap-ocp2scp.c2
-rw-r--r--drivers/bus/sunxi-rsb.c8
-rw-r--r--drivers/char/hw_random/via-rng.c5
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c90
-rw-r--r--drivers/char/ipmi/ipmi_watchdog.c8
-rw-r--r--drivers/clk/clk-gpio.c33
-rw-r--r--drivers/clk/clk-qoriq.c4
-rw-r--r--drivers/clk/clk-scpi.c1
-rw-r--r--drivers/clk/imx/clk-pllv1.c14
-rw-r--r--drivers/clk/imx/clk-pllv2.c9
-rw-r--r--drivers/clk/imx/clk-vf610.c8
-rw-r--r--drivers/clk/mmp/clk-mmp2.c1
-rw-r--r--drivers/clk/mmp/clk-pxa168.c1
-rw-r--r--drivers/clk/mmp/clk-pxa910.c1
-rw-r--r--drivers/clk/sunxi/clk-a10-pll2.c23
-rw-r--r--drivers/clk/ti/clk-816x.c2
-rw-r--r--drivers/clk/ti/clkt_dpll.c4
-rw-r--r--drivers/clk/ti/divider.c16
-rw-r--r--drivers/clk/ti/fapll.c4
-rw-r--r--drivers/clk/ti/mux.c15
-rw-r--r--drivers/clocksource/Kconfig133
-rw-r--r--drivers/clocksource/Makefile2
-rw-r--r--drivers/clocksource/acpi_pm.c27
-rw-r--r--drivers/clocksource/arm_global_timer.c21
-rw-r--r--drivers/clocksource/dw_apb_timer.c46
-rw-r--r--drivers/clocksource/dw_apb_timer_of.c16
-rw-r--r--drivers/clocksource/fsl_ftm_timer.c4
-rw-r--r--drivers/clocksource/h8300_timer16.c222
-rw-r--r--drivers/clocksource/h8300_timer8.c264
-rw-r--r--drivers/clocksource/h8300_tpu.c159
-rw-r--r--drivers/clocksource/mmio.c2
-rw-r--r--drivers/clocksource/mtk_timer.c20
-rw-r--r--drivers/clocksource/rockchip_timer.c23
-rw-r--r--drivers/clocksource/tango_xtal.c18
-rw-r--r--drivers/clocksource/tegra20_timer.c3
-rw-r--r--drivers/clocksource/time-lpc32xx.c4
-rw-r--r--drivers/clocksource/time-pistachio.c2
-rw-r--r--drivers/clocksource/timer-sun5i.c16
-rw-r--r--drivers/clocksource/vt8500_timer.c1
-rw-r--r--drivers/connector/connector.c11
-rw-r--r--drivers/cpufreq/Kconfig.arm5
-rw-r--r--drivers/cpufreq/Kconfig.x861
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c3
-rw-r--r--drivers/cpufreq/cpufreq.c21
-rw-r--r--drivers/cpufreq/intel_pstate.c322
-rw-r--r--drivers/cpufreq/s3c24xx-cpufreq.c2
-rw-r--r--drivers/cpufreq/scpi-cpufreq.c2
-rw-r--r--drivers/crypto/nx/nx-aes-ccm.c2
-rw-r--r--drivers/crypto/nx/nx-aes-gcm.c3
-rw-r--r--drivers/crypto/padlock-aes.c2
-rw-r--r--drivers/crypto/padlock-sha.c2
-rw-r--r--drivers/crypto/qat/qat_common/adf_ctl_drv.c2
-rw-r--r--drivers/crypto/talitos.c2
-rw-r--r--drivers/dma/at_hdmac.c20
-rw-r--r--drivers/dma/at_hdmac_regs.h6
-rw-r--r--drivers/dma/at_xdmac.c29
-rw-r--r--drivers/dma/bcm2835-dma.c78
-rw-r--r--drivers/dma/edma.c57
-rw-r--r--drivers/dma/imx-sdma.c2
-rw-r--r--drivers/dma/sh/usb-dmac.c11
-rw-r--r--drivers/dma/xgene-dma.c4
-rw-r--r--drivers/firmware/dmi_scan.c6
-rw-r--r--drivers/fpga/fpga-mgr.c13
-rw-r--r--drivers/gpio/gpio-74xx-mmio.c7
-rw-r--r--drivers/gpio/gpio-ath79.c2
-rw-r--r--drivers/gpio/gpio-generic.c4
-rw-r--r--drivers/gpio/gpio-omap.c2
-rw-r--r--drivers/gpio/gpio-palmas.c2
-rw-r--r--drivers/gpio/gpio-syscon.c6
-rw-r--r--drivers/gpio/gpio-tegra.c105
-rw-r--r--drivers/gpio/gpiolib.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h129
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c228
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c108
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c101
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h94
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c172
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c302
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c24
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h24
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.c151
-rw-r--r--drivers/gpu/drm/amd/scheduler/gpu_scheduler.h11
-rw-r--r--drivers/gpu/drm/amd/scheduler/sched_fence.c23
-rw-r--r--drivers/gpu/drm/drm_atomic.c61
-rw-r--r--drivers/gpu/drm/drm_atomic_helper.c29
-rw-r--r--drivers/gpu/drm/drm_drv.c5
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c51
-rw-r--r--drivers/gpu/drm/drm_fops.c84
-rw-r--r--drivers/gpu/drm/drm_irq.c54
-rw-r--r--drivers/gpu/drm/drm_probe_helper.c3
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_crtc.c3
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c2
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h33
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c131
-rw-r--r--drivers/gpu/drm/i915/i915_gem_context.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_fence.c36
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c1
-rw-r--r--drivers/gpu/drm/i915/i915_gem_stolen.c1
-rw-r--r--drivers/gpu/drm/i915/i915_params.c5
-rw-r--r--drivers/gpu/drm/i915/intel_crt.c31
-rw-r--r--drivers/gpu/drm/i915/intel_ddi.c75
-rw-r--r--drivers/gpu/drm/i915/intel_display.c172
-rw-r--r--drivers/gpu/drm/i915/intel_dp.c51
-rw-r--r--drivers/gpu/drm/i915/intel_drv.h5
-rw-r--r--drivers/gpu/drm/i915/intel_hdmi.c19
-rw-r--r--drivers/gpu/drm/i915/intel_i2c.c6
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c15
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.c34
-rw-r--r--drivers/gpu/drm/imx/imx-drm-core.c7
-rw-r--r--drivers/gpu/drm/imx/imx-drm.h3
-rw-r--r--drivers/gpu/drm/imx/imx-tve.c1
-rw-r--r--drivers/gpu/drm/imx/ipuv3-crtc.c63
-rw-r--r--drivers/gpu/drm/imx/ipuv3-plane.c9
-rw-r--r--drivers/gpu/drm/imx/ipuv3-plane.h2
-rw-r--r--drivers/gpu/drm/imx/parallel-display.c4
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_cursor.c11
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/device.h1
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_acpi.c1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_display.c19
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drm.h4
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_usif.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c19
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpc.fuc8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h344
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h344
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h344
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h308
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h474
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/fan.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk104.c2
-rw-r--r--drivers/gpu/drm/omapdrm/omap_fbdev.c5
-rw-r--r--drivers/gpu/drm/radeon/cik.c11
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c5
-rw-r--r--drivers/gpu/drm/radeon/r100.c12
-rw-r--r--drivers/gpu/drm/radeon/r600.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_agp.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_connectors.c21
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c106
-rw-r--r--drivers/gpu/drm/radeon/radeon_irq_kms.c8
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c50
-rw-r--r--drivers/gpu/drm/radeon/radeon_mode.h5
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.c15
-rw-r--r--drivers/gpu/drm/radeon/radeon_pm.c7
-rw-r--r--drivers/gpu/drm/radeon/radeon_vce.c100
-rw-r--r--drivers/gpu/drm/radeon/rs600.c2
-rw-r--r--drivers/gpu/drm/radeon/rs690.c10
-rw-r--r--drivers/gpu/drm/radeon/rv730_dpm.c2
-rw-r--r--drivers/gpu/drm/radeon/rv770_dpm.c4
-rw-r--r--drivers/gpu/drm/radeon/si.c5
-rw-r--r--drivers/gpu/drm/radeon/si_dpm.c2
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_gem.c1
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_vop.c43
-rw-r--r--drivers/gpu/drm/ttm/ttm_lock.c2
-rw-r--r--drivers/gpu/drm/vc4/vc4_crtc.c9
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.c1
-rw-r--r--drivers/gpu/drm/vc4/vc4_hvs.c8
-rw-r--r--drivers/gpu/drm/vc4/vc4_plane.c18
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_display.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c64
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.h7
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c2
-rw-r--r--drivers/gpu/ipu-v3/ipu-common.c69
-rw-r--r--drivers/gpu/vga/vgaarb.c6
-rw-r--r--drivers/hid/hid-ids.h6
-rw-r--r--drivers/hid/hid-lg.c5
-rw-r--r--drivers/hid/usbhid/hid-quirks.c10
-rw-r--r--drivers/hid/wacom_wac.c5
-rw-r--r--drivers/hwmon/Kconfig3
-rw-r--r--drivers/hwmon/applesmc.c2
-rw-r--r--drivers/hwmon/scpi-hwmon.c21
-rw-r--r--drivers/hwmon/tmp102.c16
-rw-r--r--drivers/i2c/busses/Kconfig1
-rw-r--r--drivers/i2c/busses/i2c-davinci.c11
-rw-r--r--drivers/i2c/busses/i2c-designware-core.c6
-rw-r--r--drivers/i2c/busses/i2c-designware-core.h1
-rw-r--r--drivers/i2c/busses/i2c-designware-platdrv.c16
-rw-r--r--drivers/i2c/busses/i2c-i801.c6
-rw-r--r--drivers/i2c/busses/i2c-imx.c5
-rw-r--r--drivers/i2c/busses/i2c-mv64xxx.c27
-rw-r--r--drivers/i2c/busses/i2c-rcar.c4
-rw-r--r--drivers/i2c/busses/i2c-rk3x.c2
-rw-r--r--drivers/i2c/busses/i2c-st.c2
-rw-r--r--drivers/i2c/busses/i2c-xiic.c4
-rw-r--r--drivers/i2c/i2c-core.c2
-rw-r--r--drivers/iio/adc/ad7793.c2
-rw-r--r--drivers/iio/adc/qcom-spmi-vadc.c4
-rw-r--r--drivers/iio/adc/vf610_adc.c22
-rw-r--r--drivers/iio/adc/xilinx-xadc-core.c1
-rw-r--r--drivers/iio/dac/ad5064.c91
-rw-r--r--drivers/iio/humidity/si7020.c8
-rw-r--r--drivers/iio/industrialio-buffer.c2
-rw-r--r--drivers/iio/industrialio-core.c2
-rw-r--r--drivers/iio/light/apds9960.c1
-rw-r--r--drivers/iio/proximity/pulsedlight-lidar-lite-v2.c6
-rw-r--r--drivers/infiniband/core/cma.c21
-rw-r--r--drivers/infiniband/core/mad.c5
-rw-r--r--drivers/infiniband/core/sa_query.c32
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c27
-rw-r--r--drivers/infiniband/core/verbs.c43
-rw-r--r--drivers/infiniband/hw/mlx4/main.c2
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c19
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c13
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c14
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h10
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c49
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.h4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c57
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h49
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h2
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c2
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c13
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c48
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h5
-rw-r--r--drivers/input/joystick/db9.c1
-rw-r--r--drivers/input/joystick/gamecon.c1
-rw-r--r--drivers/input/joystick/turbografx.c1
-rw-r--r--drivers/input/joystick/walkera0701.c1
-rw-r--r--drivers/input/misc/arizona-haptics.c3
-rw-r--r--drivers/input/mouse/elan_i2c_core.c3
-rw-r--r--drivers/input/serio/parkbd.c1
-rw-r--r--drivers/input/tablet/aiptek.c9
-rw-r--r--drivers/input/touchscreen/atmel_mxt_ts.c34
-rw-r--r--drivers/input/touchscreen/elants_i2c.c21
-rw-r--r--drivers/iommu/amd_iommu_v2.c20
-rw-r--r--drivers/iommu/dma-iommu.c11
-rw-r--r--drivers/iommu/intel-iommu.c4
-rw-r--r--drivers/iommu/intel-svm.c20
-rw-r--r--drivers/iommu/intel_irq_remapping.c2
-rw-r--r--drivers/iommu/iommu.c2
-rw-r--r--drivers/iommu/ipmmu-vmsa.c2
-rw-r--r--drivers/iommu/s390-iommu.c23
-rw-r--r--drivers/irqchip/Kconfig19
-rw-r--r--drivers/irqchip/Makefile3
-rw-r--r--drivers/irqchip/irq-bcm2836.c55
-rw-r--r--drivers/irqchip/irq-gic-common.c13
-rw-r--r--drivers/irqchip/irq-gic-realview.c43
-rw-r--r--drivers/irqchip/irq-gic-v2m.c165
-rw-r--r--drivers/irqchip/irq-gic.c102
-rw-r--r--drivers/irqchip/irq-mbigen.c297
-rw-r--r--drivers/irqchip/irq-omap-intc.c28
-rw-r--r--drivers/irqchip/irq-renesas-h8300h.c8
-rw-r--r--drivers/irqchip/irq-renesas-intc-irqpin.c96
-rw-r--r--drivers/irqchip/irq-sunxi-nmi.c13
-rw-r--r--drivers/irqchip/irq-ts4800.c163
-rw-r--r--drivers/irqchip/irq-versatile-fpga.c5
-rw-r--r--drivers/irqchip/irq-zevio.c3
-rw-r--r--drivers/isdn/gigaset/ser-gigaset.c23
-rw-r--r--drivers/isdn/hardware/mISDN/mISDNipac.c7
-rw-r--r--drivers/isdn/hisax/config.c2
-rw-r--r--drivers/isdn/hisax/hfc_pci.c2
-rw-r--r--drivers/isdn/hisax/hfc_sx.c2
-rw-r--r--drivers/isdn/hisax/q931.c6
-rw-r--r--drivers/lightnvm/Kconfig1
-rw-r--r--drivers/lightnvm/core.c158
-rw-r--r--drivers/lightnvm/gennvm.c105
-rw-r--r--drivers/lightnvm/gennvm.h2
-rw-r--r--drivers/lightnvm/rrpc.c57
-rw-r--r--drivers/md/dm-crypt.c22
-rw-r--r--drivers/md/dm-mpath.c30
-rw-r--r--drivers/md/dm-thin-metadata.c34
-rw-r--r--drivers/md/dm-thin.c6
-rw-r--r--drivers/md/dm.c7
-rw-r--r--drivers/md/md.c33
-rw-r--r--drivers/md/md.h8
-rw-r--r--drivers/md/persistent-data/dm-btree.c101
-rw-r--r--drivers/md/persistent-data/dm-btree.h14
-rw-r--r--drivers/md/persistent-data/dm-space-map-metadata.c32
-rw-r--r--drivers/md/raid10.c4
-rw-r--r--drivers/media/pci/cx23885/cx23885-core.c4
-rw-r--r--drivers/media/pci/cx25821/cx25821-core.c3
-rw-r--r--drivers/media/pci/cx88/cx88-alsa.c4
-rw-r--r--drivers/media/pci/cx88/cx88-mpeg.c3
-rw-r--r--drivers/media/pci/cx88/cx88-video.c4
-rw-r--r--drivers/media/pci/ivtv/ivtv-driver.c4
-rw-r--r--drivers/media/pci/netup_unidvb/netup_unidvb_core.c2
-rw-r--r--drivers/media/pci/saa7134/saa7134-core.c4
-rw-r--r--drivers/media/pci/saa7164/saa7164-core.c4
-rw-r--r--drivers/media/pci/tw68/tw68-core.c4
-rw-r--r--drivers/media/usb/airspy/airspy.c2
-rw-r--r--drivers/media/usb/hackrf/hackrf.c13
-rw-r--r--drivers/memory/fsl_ifc.c1
-rw-r--r--drivers/misc/cxl/native.c2
-rw-r--r--drivers/mmc/card/block.c11
-rw-r--r--drivers/mmc/core/mmc.c93
-rw-r--r--drivers/mmc/host/Kconfig1
-rw-r--r--drivers/mmc/host/mtk-sd.c2
-rw-r--r--drivers/mmc/host/pxamci.c2
-rw-r--r--drivers/mtd/mtdcore.c26
-rw-r--r--drivers/mtd/nand/jz4740_nand.c1
-rw-r--r--drivers/mtd/nand/nand_base.c2
-rw-r--r--drivers/mtd/ofpart.c12
-rw-r--r--drivers/mtd/spi-nor/spi-nor.c10
-rw-r--r--drivers/mtd/ubi/debug.c2
-rw-r--r--drivers/mtd/ubi/io.c2
-rw-r--r--drivers/mtd/ubi/wl.c53
-rw-r--r--drivers/net/can/bfin_can.c2
-rw-r--r--drivers/net/can/c_can/c_can.c7
-rw-r--r--drivers/net/can/cc770/cc770.c2
-rw-r--r--drivers/net/can/flexcan.c4
-rw-r--r--drivers/net/can/janz-ican3.c1
-rw-r--r--drivers/net/can/m_can/m_can.c7
-rw-r--r--drivers/net/can/pch_can.c3
-rw-r--r--drivers/net/can/rcar_can.c11
-rw-r--r--drivers/net/can/sja1000/sja1000.c4
-rw-r--r--drivers/net/can/sun4i_can.c1
-rw-r--r--drivers/net/can/ti_hecc.c7
-rw-r--r--drivers/net/can/usb/ems_usb.c1
-rw-r--r--drivers/net/can/usb/esd_usb2.c1
-rw-r--r--drivers/net/can/usb/kvaser_usb.c5
-rw-r--r--drivers/net/can/usb/usb_8dev.c4
-rw-r--r--drivers/net/can/xilinx_can.c9
-rw-r--r--drivers/net/dsa/mv88e6060.c114
-rw-r--r--drivers/net/dsa/mv88e6060.h111
-rw-r--r--drivers/net/ethernet/Kconfig2
-rw-r--r--drivers/net/ethernet/Makefile2
-rw-r--r--drivers/net/ethernet/amd/pcnet32.c5
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-dev.c4
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_main.c69
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_main.h4
-rw-r--r--drivers/net/ethernet/atheros/alx/main.c2
-rw-r--r--drivers/net/ethernet/atheros/alx/reg.h1
-rw-r--r--drivers/net/ethernet/atheros/atl1c/atl1c_main.c7
-rw-r--r--drivers/net/ethernet/aurora/Kconfig21
-rw-r--r--drivers/net/ethernet/aurora/Makefile1
-rw-r--r--drivers/net/ethernet/aurora/nb8800.c1552
-rw-r--r--drivers/net/ethernet/aurora/nb8800.h316
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c22
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c6
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c90
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h6
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c9
-rw-r--r--drivers/net/ethernet/cadence/macb.c4
-rw-r--r--drivers/net/ethernet/cadence/macb.h5
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_main.c2
-rw-r--r--drivers/net/ethernet/cavium/thunder/nic.h5
-rw-r--r--drivers/net/ethernet/cavium/thunder/nic_main.c22
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c16
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c14
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.c2
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.h2
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.c28
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.h2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c4
-rw-r--r--drivers/net/ethernet/dec/tulip/tulip_core.c9
-rw-r--r--drivers/net/ethernet/dec/tulip/winbond-840.c2
-rw-r--r--drivers/net/ethernet/dlink/Kconfig5
-rw-r--r--drivers/net/ethernet/dlink/dl2k.c55
-rw-r--r--drivers/net/ethernet/dlink/dl2k.h15
-rw-r--r--drivers/net/ethernet/emulex/benet/be.h2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_ethtool.c19
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c10
-rw-r--r--drivers/net/ethernet/emulex/benet/be_roce.c36
-rw-r--r--drivers/net/ethernet/emulex/benet/be_roce.h4
-rw-r--r--drivers/net/ethernet/ezchip/nps_enet.c30
-rw-r--r--drivers/net/ethernet/freescale/Kconfig3
-rw-r--r--drivers/net/ethernet/freescale/fs_enet/mac-fcc.c2
-rw-r--r--drivers/net/ethernet/freescale/fsl_pq_mdio.c2
-rw-r--r--drivers/net/ethernet/freescale/gianfar.c14
-rw-r--r--drivers/net/ethernet/freescale/gianfar.h1
-rw-r--r--drivers/net/ethernet/freescale/gianfar_ptp.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c49
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h11
-rw-r--r--drivers/net/ethernet/icplus/Kconfig13
-rw-r--r--drivers/net/ethernet/icplus/Makefile5
-rw-r--r--drivers/net/ethernet/icplus/ipg.c2300
-rw-r--r--drivers/net/ethernet/icplus/ipg.h748
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_netdev.c4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq.c6
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c11
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_adminq.c6
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_main.c10
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c3
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c33
-rw-r--r--drivers/net/ethernet/marvell/mvpp2.c52
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_clock.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_main.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c50
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c76
-rw-r--r--drivers/net/ethernet/natsemi/natsemi.c12
-rw-r--r--drivers/net/ethernet/nxp/lpc_eth.c2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed.h3
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev.c53
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_int.c33
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_int.h15
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_main.c56
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_reg_addr.h4
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_sp.h8
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_spq.c63
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c5
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c6
-rw-r--r--drivers/net/ethernet/qlogic/qlge/qlge_main.c5
-rw-r--r--drivers/net/ethernet/qualcomm/qca_spi.c5
-rw-r--r--drivers/net/ethernet/realtek/r8169.c6
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c19
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c80
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.h58
-rw-r--r--drivers/net/ethernet/sfc/ef10.c24
-rw-r--r--drivers/net/ethernet/sfc/efx.c2
-rw-r--r--drivers/net/ethernet/sfc/efx.h5
-rw-r--r--drivers/net/ethernet/sfc/farch.c2
-rw-r--r--drivers/net/ethernet/sfc/txc43128_phy.c2
-rw-r--r--drivers/net/ethernet/smsc/smsc911x.c17
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c13
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c18
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c28
-rw-r--r--drivers/net/ethernet/ti/cpsw-common.c3
-rw-r--r--drivers/net/ethernet/ti/cpsw.c63
-rw-r--r--drivers/net/ethernet/via/via-velocity.c24
-rw-r--r--drivers/net/fjes/fjes_hw.c2
-rw-r--r--drivers/net/geneve.c12
-rw-r--r--drivers/net/hamradio/6pack.c12
-rw-r--r--drivers/net/hamradio/mkiss.c9
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c14
-rw-r--r--drivers/net/macvlan.c2
-rw-r--r--drivers/net/macvtap.c4
-rw-r--r--drivers/net/phy/at803x.c4
-rw-r--r--drivers/net/phy/broadcom.c2
-rw-r--r--drivers/net/phy/marvell.c16
-rw-r--r--drivers/net/phy/mdio-mux.c7
-rw-r--r--drivers/net/phy/micrel.c13
-rw-r--r--drivers/net/phy/phy.c6
-rw-r--r--drivers/net/phy/vitesse.c16
-rw-r--r--drivers/net/ppp/pppoe.c14
-rw-r--r--drivers/net/ppp/pptp.c6
-rw-r--r--drivers/net/tun.c4
-rw-r--r--drivers/net/usb/cdc_ether.c5
-rw-r--r--drivers/net/usb/cdc_mbim.c28
-rw-r--r--drivers/net/usb/cdc_ncm.c67
-rw-r--r--drivers/net/usb/qmi_wwan.c2
-rw-r--r--drivers/net/usb/r8152.c31
-rw-r--r--drivers/net/veth.c6
-rw-r--r--drivers/net/virtio_net.c34
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c78
-rw-r--r--drivers/net/vmxnet3/vmxnet3_int.h4
-rw-r--r--drivers/net/vrf.c21
-rw-r--r--drivers/net/vxlan.c75
-rw-r--r--drivers/net/wan/hdlc_fr.c10
-rw-r--r--drivers/net/wan/x25_asy.c6
-rw-r--r--drivers/net/wireless/ath/ath10k/core.c49
-rw-r--r--drivers/net/wireless/ath/ath10k/core.h1
-rw-r--r--drivers/net/wireless/ath/ath10k/hw.h17
-rw-r--r--drivers/net/wireless/ath/ath10k/mac.c2
-rw-r--r--drivers/net/wireless/ath/ath10k/pci.c53
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-7000.c47
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-8000.c2
-rw-r--r--drivers/net/wireless/iwlwifi/mvm/d3.c8
-rw-r--r--drivers/net/wireless/iwlwifi/mvm/mac80211.c11
-rw-r--r--drivers/net/wireless/iwlwifi/mvm/sta.c97
-rw-r--r--drivers/net/wireless/iwlwifi/mvm/sta.h4
-rw-r--r--drivers/net/wireless/iwlwifi/pcie/drv.c19
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c2
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c2
-rw-r--r--drivers/net/xen-netback/netback.c34
-rw-r--r--drivers/nvme/host/Makefile3
-rw-r--r--drivers/nvme/host/lightnvm.c187
-rw-r--r--drivers/nvme/host/nvme.h14
-rw-r--r--drivers/nvme/host/pci.c71
-rw-r--r--drivers/of/address.c5
-rw-r--r--drivers/of/fdt.c7
-rw-r--r--drivers/of/irq.c3
-rw-r--r--drivers/of/of_reserved_mem.c8
-rw-r--r--drivers/parisc/iommu-helpers.h15
-rw-r--r--drivers/pci/host/Kconfig1
-rw-r--r--drivers/pci/host/pcie-altera.c23
-rw-r--r--drivers/pci/host/pcie-designware.c1
-rw-r--r--drivers/pci/host/pcie-hisi.c8
-rw-r--r--drivers/pci/msi.c8
-rw-r--r--drivers/pci/pci-acpi.c42
-rw-r--r--drivers/pci/pci-driver.c16
-rw-r--r--drivers/pci/pci-sysfs.c5
-rw-r--r--drivers/pci/pci.h2
-rw-r--r--drivers/pci/probe.c6
-rw-r--r--drivers/phy/Kconfig1
-rw-r--r--drivers/phy/phy-bcm-cygnus-pcie.c16
-rw-r--r--drivers/phy/phy-berlin-sata.c20
-rw-r--r--drivers/phy/phy-brcmstb-sata.c17
-rw-r--r--drivers/phy/phy-core.c21
-rw-r--r--drivers/phy/phy-miphy28lp.c16
-rw-r--r--drivers/phy/phy-miphy365x.c16
-rw-r--r--drivers/phy/phy-mt65xx-usb3.c20
-rw-r--r--drivers/phy/phy-rockchip-usb.c17
-rw-r--r--drivers/pinctrl/Kconfig4
-rw-r--r--drivers/pinctrl/bcm/pinctrl-bcm2835.c13
-rw-r--r--drivers/pinctrl/freescale/pinctrl-imx1-core.c8
-rw-r--r--drivers/pinctrl/freescale/pinctrl-vf610.c2
-rw-r--r--drivers/pinctrl/intel/pinctrl-broxton.c1
-rw-r--r--drivers/pinctrl/intel/pinctrl-intel.c41
-rw-r--r--drivers/pinctrl/intel/pinctrl-intel.h3
-rw-r--r--drivers/pinctrl/intel/pinctrl-sunrisepoint.c1
-rw-r--r--drivers/pinctrl/mediatek/pinctrl-mtk-common.c11
-rw-r--r--drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c2
-rw-r--r--drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c2
-rw-r--r--drivers/pinctrl/sh-pfc/pfc-sh7734.c6
-rw-r--r--drivers/powercap/intel_rapl.c7
-rw-r--r--drivers/remoteproc/remoteproc_core.c2
-rw-r--r--drivers/remoteproc/remoteproc_debugfs.c2
-rw-r--r--drivers/rtc/rtc-da9063.c19
-rw-r--r--drivers/rtc/rtc-ds1307.c44
-rw-r--r--drivers/rtc/rtc-rk808.c48
-rw-r--r--drivers/s390/cio/chsc.c37
-rw-r--r--drivers/s390/cio/chsc.h15
-rw-r--r--drivers/s390/cio/cio.c14
-rw-r--r--drivers/s390/cio/css.c5
-rw-r--r--drivers/s390/crypto/Makefile7
-rw-r--r--drivers/s390/crypto/ap_bus.c10
-rw-r--r--drivers/s390/crypto/zcrypt_api.c10
-rw-r--r--drivers/s390/crypto/zcrypt_api.h1
-rw-r--r--drivers/s390/crypto/zcrypt_msgtype50.c1
-rw-r--r--drivers/s390/crypto/zcrypt_msgtype6.c3
-rw-r--r--drivers/s390/virtio/virtio_ccw.c62
-rw-r--r--drivers/scsi/Kconfig2
-rw-r--r--drivers/scsi/advansys.c2
-rw-r--r--drivers/scsi/hosts.c11
-rw-r--r--drivers/scsi/hpsa.c2
-rw-r--r--drivers/scsi/mpt3sas/Kconfig9
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_scsih.c3
-rw-r--r--drivers/scsi/mvsas/mv_init.c4
-rw-r--r--drivers/scsi/qla2xxx/qla_nx.c3
-rw-r--r--drivers/scsi/qla2xxx/tcm_qla2xxx.c2
-rw-r--r--drivers/scsi/scsi_debug.c9
-rw-r--r--drivers/scsi/scsi_pm.c20
-rw-r--r--drivers/scsi/scsi_scan.c9
-rw-r--r--drivers/scsi/scsi_sysfs.c22
-rw-r--r--drivers/scsi/sd.c72
-rw-r--r--drivers/scsi/sd.h1
-rw-r--r--drivers/scsi/ses.c30
-rw-r--r--drivers/scsi/st.c5
-rw-r--r--drivers/sh/pm_runtime.c2
-rw-r--r--drivers/soc/mediatek/Kconfig1
-rw-r--r--drivers/soc/ti/knav_qmss_queue.c8
-rw-r--r--drivers/spi/spi-bcm63xx.c4
-rw-r--r--drivers/spi/spi-fsl-dspi.c12
-rw-r--r--drivers/spi/spi-mt65xx.c26
-rw-r--r--drivers/spi/spi-pl022.c28
-rw-r--r--drivers/spi/spi.c4
-rw-r--r--drivers/spi/spidev.c2
-rw-r--r--drivers/staging/android/ion/ion_chunk_heap.c4
-rw-r--r--drivers/staging/iio/Kconfig3
-rw-r--r--drivers/staging/iio/adc/lpc32xx_adc.c4
-rw-r--r--drivers/staging/iio/iio_simple_dummy_events.c2
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h1
-rw-r--r--drivers/staging/lustre/lustre/libcfs/module.c17
-rw-r--r--drivers/staging/lustre/lustre/llite/symlink.c24
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr.c4
-rw-r--r--drivers/staging/lustre/lustre/obdecho/echo_client.c20
-rw-r--r--drivers/staging/wilc1000/coreconfigurator.c48
-rw-r--r--drivers/target/iscsi/iscsi_target.c13
-rw-r--r--drivers/target/iscsi/iscsi_target_nego.c1
-rw-r--r--drivers/target/iscsi/iscsi_target_parameters.c10
-rw-r--r--drivers/target/target_core_sbc.c17
-rw-r--r--drivers/target/target_core_stat.c2
-rw-r--r--drivers/target/target_core_tmr.c7
-rw-r--r--drivers/target/target_core_transport.c26
-rw-r--r--drivers/target/target_core_user.c4
-rw-r--r--drivers/thermal/Kconfig2
-rw-r--r--drivers/thermal/imx_thermal.c56
-rw-r--r--drivers/thermal/of-thermal.c2
-rw-r--r--drivers/thermal/power_allocator.c24
-rw-r--r--drivers/thermal/rcar_thermal.c49
-rw-r--r--drivers/thermal/rockchip_thermal.c328
-rw-r--r--drivers/tty/n_tty.c24
-rw-r--r--drivers/tty/serial/8250/8250_fsl.c1
-rw-r--r--drivers/tty/serial/8250/8250_uniphier.c8
-rw-r--r--drivers/tty/serial/8250/Kconfig1
-rw-r--r--drivers/tty/serial/Kconfig2
-rw-r--r--drivers/tty/serial/bcm63xx_uart.c2
-rw-r--r--drivers/tty/serial/earlycon.c2
-rw-r--r--drivers/tty/serial/etraxfs-uart.c2
-rw-r--r--drivers/tty/serial/sh-sci.c2
-rw-r--r--drivers/tty/serial/sunhv.c12
-rw-r--r--drivers/tty/sysrq.c6
-rw-r--r--drivers/tty/tty_audit.c2
-rw-r--r--drivers/tty/tty_buffer.c2
-rw-r--r--drivers/tty/tty_io.c4
-rw-r--r--drivers/tty/tty_ioctl.c4
-rw-r--r--drivers/tty/tty_ldisc.c2
-rw-r--r--drivers/usb/chipidea/ci_hdrc_imx.c142
-rw-r--r--drivers/usb/chipidea/debug.c2
-rw-r--r--drivers/usb/chipidea/udc.c17
-rw-r--r--drivers/usb/chipidea/usbmisc_imx.c10
-rw-r--r--drivers/usb/class/cdc-acm.c5
-rw-r--r--drivers/usb/class/usblp.c2
-rw-r--r--drivers/usb/core/Kconfig3
-rw-r--r--drivers/usb/core/config.c3
-rw-r--r--drivers/usb/core/hub.c44
-rw-r--r--drivers/usb/core/port.c4
-rw-r--r--drivers/usb/core/quirks.c9
-rw-r--r--drivers/usb/dwc2/hcd.c9
-rw-r--r--drivers/usb/dwc2/platform.c84
-rw-r--r--drivers/usb/dwc3/dwc3-pci.c4
-rw-r--r--drivers/usb/dwc3/gadget.c25
-rw-r--r--drivers/usb/gadget/function/f_fs.c6
-rw-r--r--drivers/usb/gadget/function/f_loopback.c2
-rw-r--r--drivers/usb/gadget/function/f_midi.c3
-rw-r--r--drivers/usb/gadget/function/uvc_configfs.c2
-rw-r--r--drivers/usb/gadget/udc/atmel_usba_udc.c2
-rw-r--r--drivers/usb/gadget/udc/pxa27x_udc.c3
-rw-r--r--drivers/usb/host/ohci-at91.c11
-rw-r--r--drivers/usb/host/whci/qset.c4
-rw-r--r--drivers/usb/host/xhci-hub.c62
-rw-r--r--drivers/usb/host/xhci-pci.c8
-rw-r--r--drivers/usb/host/xhci-ring.c35
-rw-r--r--drivers/usb/host/xhci.c18
-rw-r--r--drivers/usb/musb/Kconfig2
-rw-r--r--drivers/usb/musb/musb_core.c20
-rw-r--r--drivers/usb/musb/musb_host.c22
-rw-r--r--drivers/usb/phy/Kconfig4
-rw-r--r--drivers/usb/phy/phy-msm-usb.c6
-rw-r--r--drivers/usb/phy/phy-mxs-usb.c12
-rw-r--r--drivers/usb/phy/phy-omap-otg.c2
-rw-r--r--drivers/usb/renesas_usbhs/mod_gadget.c11
-rw-r--r--drivers/usb/serial/cp210x.c1
-rw-r--r--drivers/usb/serial/ipaq.c3
-rw-r--r--drivers/usb/serial/option.c11
-rw-r--r--drivers/usb/serial/qcserial.c94
-rw-r--r--drivers/usb/serial/ti_usb_3410_5052.c2
-rw-r--r--drivers/usb/serial/ti_usb_3410_5052.h4
-rw-r--r--drivers/usb/serial/usb-serial-simple.c1
-rw-r--r--drivers/usb/storage/uas.c4
-rw-r--r--drivers/usb/storage/unusual_devs.h2
-rw-r--r--drivers/usb/storage/unusual_uas.h2
-rw-r--r--drivers/vfio/Kconfig15
-rw-r--r--drivers/vfio/pci/vfio_pci.c10
-rw-r--r--drivers/vfio/platform/vfio_platform.c1
-rw-r--r--drivers/vfio/platform/vfio_platform_common.c5
-rw-r--r--drivers/vfio/vfio.c188
-rw-r--r--drivers/vhost/vhost.c8
-rw-r--r--drivers/video/fbdev/fsl-diu-fb.c13
-rw-r--r--drivers/video/fbdev/omap2/dss/venc.c12
-rw-r--r--drivers/virtio/virtio.c1
-rw-r--r--drivers/virtio/virtio_ring.c48
-rw-r--r--drivers/watchdog/Kconfig2
-rw-r--r--drivers/watchdog/mtk_wdt.c1
-rw-r--r--drivers/watchdog/omap_wdt.c2
-rw-r--r--drivers/watchdog/pnx4008_wdt.c8
-rw-r--r--drivers/watchdog/tegra_wdt.c4
-rw-r--r--drivers/watchdog/w83977f_wdt.c2
-rw-r--r--drivers/xen/events/events_base.c5
-rw-r--r--drivers/xen/events/events_fifo.c23
-rw-r--r--drivers/xen/evtchn.c123
-rw-r--r--drivers/xen/gntdev.c2
-rw-r--r--drivers/xen/xen-pciback/pciback.h1
-rw-r--r--drivers/xen/xen-pciback/pciback_ops.c75
-rw-r--r--drivers/xen/xen-pciback/xenbus.c4
-rw-r--r--drivers/xen/xen-scsiback.c2
-rw-r--r--fs/9p/acl.c24
-rw-r--r--fs/9p/vfs_inode.c28
-rw-r--r--fs/9p/vfs_inode_dotl.c21
-rw-r--r--fs/9p/xattr.c4
-rw-r--r--fs/Kconfig6
-rw-r--r--fs/affs/inode.c1
-rw-r--r--fs/affs/namei.c1
-rw-r--r--fs/affs/symlink.c9
-rw-r--r--fs/afs/inode.c1
-rw-r--r--fs/autofs4/symlink.c14
-rw-r--r--fs/befs/linuxvfs.c40
-rw-r--r--fs/block_dev.c27
-rw-r--r--fs/btrfs/acl.c8
-rw-r--r--fs/btrfs/backref.c2
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent-tree.c133
-rw-r--r--fs/btrfs/file.c28
-rw-r--r--fs/btrfs/free-space-cache.c10
-rw-r--r--fs/btrfs/inode.c45
-rw-r--r--fs/btrfs/qgroup.c5
-rw-r--r--fs/btrfs/scrub.c62
-rw-r--r--fs/btrfs/tests/free-space-tests.c4
-rw-r--r--fs/btrfs/transaction.c33
-rw-r--r--fs/btrfs/transaction.h6
-rw-r--r--fs/btrfs/volumes.c16
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/btrfs/xattr.c166
-rw-r--r--fs/btrfs/xattr.h2
-rw-r--r--fs/cachefiles/rdwr.c2
-rw-r--r--fs/ceph/acl.c16
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/cifs/cifsfs.c3
-rw-r--r--fs/cifs/cifsfs.h5
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/link.c10
-rw-r--r--fs/cifs/xattr.c16
-rw-r--r--fs/coda/cnode.c5
-rw-r--r--fs/coda/symlink.c4
-rw-r--r--fs/compat_ioctl.c253
-rw-r--r--fs/configfs/dir.c110
-rw-r--r--fs/configfs/symlink.c22
-rw-r--r--fs/cramfs/inode.c1
-rw-r--r--fs/dax.c4
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/direct-io.c11
-rw-r--r--fs/dlm/lowcomms.c4
-rw-r--r--fs/ecryptfs/inode.c17
-rw-r--r--fs/efs/inode.c1
-rw-r--r--fs/efs/symlink.c4
-rw-r--r--fs/exofs/inode.c6
-rw-r--r--fs/exofs/namei.c1
-rw-r--r--fs/ext2/inode.c1
-rw-r--r--fs/ext2/namei.c1
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext2/symlink.c5
-rw-r--r--fs/ext2/xattr.c15
-rw-r--r--fs/ext2/xattr_security.c21
-rw-r--r--fs/ext2/xattr_trusted.c23
-rw-r--r--fs/ext2/xattr_user.c23
-rw-r--r--fs/ext4/crypto.c2
-rw-r--r--fs/ext4/ext4.h51
-rw-r--r--fs/ext4/inode.c1
-rw-r--r--fs/ext4/namei.c1
-rw-r--r--fs/ext4/super.c6
-rw-r--r--fs/ext4/symlink.c31
-rw-r--r--fs/ext4/sysfs.c2
-rw-r--r--fs/ext4/xattr.c17
-rw-r--r--fs/ext4/xattr_security.c22
-rw-r--r--fs/ext4/xattr_trusted.c23
-rw-r--r--fs/ext4/xattr_user.c23
-rw-r--r--fs/f2fs/inode.c1
-rw-r--r--fs/f2fs/namei.c31
-rw-r--r--fs/f2fs/xattr.c92
-rw-r--r--fs/f2fs/xattr.h2
-rw-r--r--fs/fat/dir.c16
-rw-r--r--fs/freevxfs/vxfs_inode.c1
-rw-r--r--fs/fuse/cuse.c2
-rw-r--r--fs/fuse/dir.c17
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/gfs2/acl.c4
-rw-r--r--fs/gfs2/acl.h2
-rw-r--r--fs/gfs2/inode.c19
-rw-r--r--fs/gfs2/xattr.c50
-rw-r--r--fs/gfs2/xattr.h1
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hfsplus/posix_acl.c8
-rw-r--r--fs/hfsplus/xattr.c12
-rw-r--r--fs/hostfs/hostfs_kern.c22
-rw-r--r--fs/hpfs/inode.c1
-rw-r--r--fs/hpfs/namei.c5
-rw-r--r--fs/hugetlbfs/inode.c66
-rw-r--r--fs/inode.c6
-rw-r--r--fs/internal.h7
-rw-r--r--fs/ioctl.c4
-rw-r--r--fs/isofs/inode.c1
-rw-r--r--fs/isofs/rock.c4
-rw-r--r--fs/jbd2/transaction.c12
-rw-r--r--fs/jffs2/security.c22
-rw-r--r--fs/jffs2/symlink.c2
-rw-r--r--fs/jffs2/xattr.c26
-rw-r--r--fs/jffs2/xattr_trusted.c21
-rw-r--r--fs/jffs2/xattr_user.c20
-rw-r--r--fs/jfs/acl.c8
-rw-r--r--fs/jfs/inode.c1
-rw-r--r--fs/jfs/namei.c1
-rw-r--r--fs/jfs/symlink.c5
-rw-r--r--fs/kernfs/inode.c4
-rw-r--r--fs/kernfs/symlink.c24
-rw-r--r--fs/libfs.c22
-rw-r--r--fs/logfs/dir.c9
-rw-r--r--fs/logfs/inode.c3
-rw-r--r--fs/logfs/logfs.h1
-rw-r--r--fs/minix/inode.c4
-rw-r--r--fs/namei.c127
-rw-r--r--fs/ncpfs/inode.c4
-rw-r--r--fs/ncpfs/ioctl.c2
-rw-r--r--fs/nfs/inode.c43
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs3acl.c4
-rw-r--r--fs/nfs/nfs42proc.c3
-rw-r--r--fs/nfs/nfs4client.c2
-rw-r--r--fs/nfs/nfs4file.c59
-rw-r--r--fs/nfs/nfs4proc.c77
-rw-r--r--fs/nfs/nfs4xdr.c1
-rw-r--r--fs/nfs/objlayout/objio_osd.c5
-rw-r--r--fs/nfs/pagelist.c2
-rw-r--r--fs/nfs/pnfs.c60
-rw-r--r--fs/nfs/symlink.c39
-rw-r--r--fs/nfsd/nfs4layouts.c2
-rw-r--r--fs/nilfs2/inode.c1
-rw-r--r--fs/nilfs2/namei.c4
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c2
-rw-r--r--fs/ocfs2/inode.c1
-rw-r--r--fs/ocfs2/locks.c5
-rw-r--r--fs/ocfs2/namei.c3
-rw-r--r--fs/ocfs2/resize.c15
-rw-r--r--fs/ocfs2/symlink.c3
-rw-r--r--fs/ocfs2/xattr.c168
-rw-r--r--fs/overlayfs/copy_up.c23
-rw-r--r--fs/overlayfs/inode.c72
-rw-r--r--fs/overlayfs/overlayfs.h3
-rw-r--r--fs/posix_acl.c25
-rw-r--r--fs/proc/base.c25
-rw-r--r--fs/proc/inode.c21
-rw-r--r--fs/proc/namespaces.c10
-rw-r--r--fs/proc/self.c18
-rw-r--r--fs/proc/thread_self.c19
-rw-r--r--fs/qnx4/inode.c1
-rw-r--r--fs/qnx6/inode.c1
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/reiserfs/inode.c1
-rw-r--r--fs/reiserfs/namei.c4
-rw-r--r--fs/reiserfs/xattr.c16
-rw-r--r--fs/reiserfs/xattr_acl.c8
-rw-r--r--fs/reiserfs/xattr_security.c16
-rw-r--r--fs/reiserfs/xattr_trusted.c15
-rw-r--r--fs/reiserfs/xattr_user.c14
-rw-r--r--fs/romfs/super.c1
-rw-r--r--fs/splice.c8
-rw-r--r--fs/squashfs/inode.c2
-rw-r--r--fs/squashfs/symlink.c3
-rw-r--r--fs/squashfs/xattr.c38
-rw-r--r--fs/sysv/inode.c15
-rw-r--r--fs/ubifs/file.c2
-rw-r--r--fs/udf/inode.c3
-rw-r--r--fs/udf/namei.c8
-rw-r--r--fs/udf/symlink.c4
-rw-r--r--fs/udf/udfdecl.h1
-rw-r--r--fs/ufs/Makefile2
-rw-r--r--fs/ufs/inode.c5
-rw-r--r--fs/ufs/namei.c5
-rw-r--r--fs/ufs/symlink.c42
-rw-r--r--fs/ufs/ufs.h4
-rw-r--r--fs/xattr.c165
-rw-r--r--fs/xfs/xfs_acl.c23
-rw-r--r--fs/xfs/xfs_acl.h4
-rw-r--r--fs/xfs/xfs_iops.c14
-rw-r--r--fs/xfs/xfs_xattr.c143
-rw-r--r--include/asm-generic/barrier.h2
-rw-r--r--include/asm-generic/pgtable.h10
-rw-r--r--include/asm-generic/qspinlock.h9
-rw-r--r--include/asm-generic/tlb.h2
-rw-r--r--include/drm/drmP.h10
-rw-r--r--include/drm/drm_atomic.h3
-rw-r--r--include/kvm/arm_vgic.h2
-rw-r--r--include/linux/acpi.h4
-rw-r--r--include/linux/bitops.h2
-rw-r--r--include/linux/blkdev.h6
-rw-r--r--include/linux/bootmem.h4
-rw-r--r--include/linux/bpf.h5
-rw-r--r--include/linux/cgroup-defs.h13
-rw-r--r--include/linux/cgroup.h47
-rw-r--r--include/linux/clocksource.h16
-rw-r--r--include/linux/compiler.h17
-rw-r--r--include/linux/configfs.h10
-rw-r--r--include/linux/context_tracking.h4
-rw-r--r--include/linux/context_tracking_state.h4
-rw-r--r--include/linux/cpufreq.h1
-rw-r--r--include/linux/delayed_call.h34
-rw-r--r--include/linux/dns_resolver.h2
-rw-r--r--include/linux/enclosure.h4
-rw-r--r--include/linux/filter.h19
-rw-r--r--include/linux/fs.h18
-rw-r--r--include/linux/ftrace.h1
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/init_task.h2
-rw-r--r--include/linux/interrupt.h1
-rw-r--r--include/linux/ipv6.h2
-rw-r--r--include/linux/irqchip/arm-gic-v3.h1
-rw-r--r--include/linux/irqchip/arm-gic.h13
-rw-r--r--include/linux/irqdesc.h6
-rw-r--r--include/linux/irqdomain.h13
-rw-r--r--include/linux/jump_label.h2
-rw-r--r--include/linux/kernel.h28
-rw-r--r--include/linux/kexec.h2
-rw-r--r--include/linux/kmemleak.h2
-rw-r--r--include/linux/kref.h33
-rw-r--r--include/linux/kvm_host.h11
-rw-r--r--include/linux/libata.h1
-rw-r--r--include/linux/lightnvm.h197
-rw-r--r--include/linux/list.h14
-rw-r--r--include/linux/list_bl.h2
-rw-r--r--include/linux/list_nulls.h2
-rw-r--r--include/linux/lockdep.h2
-rw-r--r--include/linux/marvell_phy.h1
-rw-r--r--include/linux/mlx4/device.h11
-rw-r--r--include/linux/mlx5/mlx5_ifc.h24
-rw-r--r--include/linux/mmdebug.h1
-rw-r--r--include/linux/msi.h18
-rw-r--r--include/linux/mtd/spi-nor.h2
-rw-r--r--include/linux/net.h13
-rw-r--r--include/linux/netdevice.h35
-rw-r--r--include/linux/netfilter/ipset/ip_set.h2
-rw-r--r--include/linux/netfilter/nfnetlink.h2
-rw-r--r--include/linux/netfilter_ingress.h13
-rw-r--r--include/linux/nfs_fs.h1
-rw-r--r--include/linux/nfs_xdr.h1
-rw-r--r--include/linux/of_dma.h2
-rw-r--r--include/linux/of_irq.h19
-rw-r--r--include/linux/pci.h19
-rw-r--r--include/linux/perf_event.h6
-rw-r--r--include/linux/platform_data/edma.h2
-rw-r--r--include/linux/platform_data/irq-renesas-intc-irqpin.h29
-rw-r--r--include/linux/posix_acl_xattr.h6
-rw-r--r--include/linux/proportions.h2
-rw-r--r--include/linux/qed/common_hsi.h2
-rw-r--r--include/linux/qed/qed_chain.h3
-rw-r--r--include/linux/rculist.h105
-rw-r--r--include/linux/rcupdate.h21
-rw-r--r--include/linux/rcutiny.h8
-rw-r--r--include/linux/rcutree.h4
-rw-r--r--include/linux/rhashtable.h18
-rw-r--r--include/linux/sched.h40
-rw-r--r--include/linux/sched_clock.h12
-rw-r--r--include/linux/scpi_protocol.h2
-rw-r--r--include/linux/signal.h1
-rw-r--r--include/linux/slab.h45
-rw-r--r--include/linux/stop_machine.h13
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/linux/thermal.h3
-rw-r--r--include/linux/time.h26
-rw-r--r--include/linux/tracepoint-defs.h27
-rw-r--r--include/linux/tracepoint.h20
-rw-r--r--include/linux/tty.h6
-rw-r--r--include/linux/types.h2
-rw-r--r--include/linux/uprobes.h2
-rw-r--r--include/linux/usb/cdc_ncm.h1
-rw-r--r--include/linux/usb/quirks.h3
-rw-r--r--include/linux/vfio.h3
-rw-r--r--include/linux/vmstat.h6
-rw-r--r--include/linux/vtime.h25
-rw-r--r--include/linux/wait.h40
-rw-r--r--include/linux/workqueue.h6
-rw-r--r--include/linux/xattr.h20
-rw-r--r--include/net/af_unix.h1
-rw-r--r--include/net/dst.h33
-rw-r--r--include/net/inet_sock.h27
-rw-r--r--include/net/inetpeer.h1
-rw-r--r--include/net/ip6_fib.h3
-rw-r--r--include/net/ip6_route.h17
-rw-r--r--include/net/ip6_tunnel.h3
-rw-r--r--include/net/ip_tunnels.h3
-rw-r--r--include/net/ipv6.h22
-rw-r--r--include/net/l3mdev.h16
-rw-r--r--include/net/mac80211.h6
-rw-r--r--include/net/ndisc.h3
-rw-r--r--include/net/netfilter/nf_tables.h16
-rw-r--r--include/net/route.h7
-rw-r--r--include/net/sch_generic.h3
-rw-r--r--include/net/sctp/structs.h17
-rw-r--r--include/net/sock.h64
-rw-r--r--include/net/switchdev.h2
-rw-r--r--include/net/vxlan.h2
-rw-r--r--include/net/xfrm.h25
-rw-r--r--include/rdma/ib_mad.h2
-rw-r--r--include/rdma/ib_verbs.h1
-rw-r--r--include/scsi/scsi_host.h3
-rw-r--r--include/sound/hda_register.h3
-rw-r--r--include/sound/soc-dapm.h1
-rw-r--r--include/sound/soc.h2
-rw-r--r--include/target/target_core_base.h2
-rw-r--r--include/uapi/linux/Kbuild1
-rw-r--r--include/uapi/linux/nfs.h11
-rw-r--r--include/uapi/linux/openvswitch.h2
-rw-r--r--include/uapi/linux/perf_event.h6
-rw-r--r--include/uapi/linux/vfio.h7
-rw-r--r--include/video/imx-ipu-v3.h1
-rw-r--r--include/xen/interface/io/ring.h14
-rw-r--r--init/Kconfig7
-rw-r--r--init/main.c2
-rw-r--r--kernel/bpf/arraymap.c10
-rw-r--r--kernel/bpf/hashtab.c34
-rw-r--r--kernel/bpf/inode.c6
-rw-r--r--kernel/bpf/syscall.c40
-rw-r--r--kernel/bpf/verifier.c3
-rw-r--r--kernel/cgroup.c99
-rw-r--r--kernel/cgroup_freezer.c23
-rw-r--r--kernel/cgroup_pids.c77
-rw-r--r--kernel/context_tracking.c4
-rw-r--r--kernel/cpuset.c33
-rw-r--r--kernel/events/callchain.c2
-rw-r--r--kernel/events/core.c395
-rw-r--r--kernel/events/ring_buffer.c2
-rw-r--r--kernel/events/uprobes.c2
-rw-r--r--kernel/fork.c14
-rw-r--r--kernel/futex.c83
-rw-r--r--kernel/irq/chip.c9
-rw-r--r--kernel/irq/irqdesc.c19
-rw-r--r--kernel/irq/irqdomain.c12
-rw-r--r--kernel/irq/manage.c31
-rw-r--r--kernel/irq/msi.c58
-rw-r--r--kernel/irq_work.c2
-rw-r--r--kernel/jump_label.c2
-rw-r--r--kernel/kexec_core.c30
-rw-r--r--kernel/ksysfs.c26
-rw-r--r--kernel/livepatch/core.c6
-rw-r--r--kernel/locking/lockdep.c2
-rw-r--r--kernel/locking/lockdep_proc.c2
-rw-r--r--kernel/locking/osq_lock.c8
-rw-r--r--kernel/locking/qspinlock.c82
-rw-r--r--kernel/locking/qspinlock_paravirt.h252
-rw-r--r--kernel/locking/qspinlock_stat.h300
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/panic.c38
-rw-r--r--kernel/pid.c4
-rw-r--r--kernel/rcu/rcutorture.c24
-rw-r--r--kernel/rcu/srcu.c2
-rw-r--r--kernel/rcu/tree.c313
-rw-r--r--kernel/rcu/tree.h61
-rw-r--r--kernel/rcu/tree_plugin.h66
-rw-r--r--kernel/rcu/tree_trace.c39
-rw-r--r--kernel/rcu/update.c22
-rw-r--r--kernel/sched/auto_group.c2
-rw-r--r--kernel/sched/clock.c4
-rw-r--r--kernel/sched/core.c225
-rw-r--r--kernel/sched/cputime.c75
-rw-r--r--kernel/sched/deadline.c59
-rw-r--r--kernel/sched/fair.c316
-rw-r--r--kernel/sched/idle_task.c1
-rw-r--r--kernel/sched/rt.c2
-rw-r--r--kernel/sched/sched.h73
-rw-r--r--kernel/sched/wait.c28
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/stop_machine.c88
-rw-r--r--kernel/time/alarmtimer.c17
-rw-r--r--kernel/time/clocksource.c4
-rw-r--r--kernel/time/ntp.c44
-rw-r--r--kernel/time/ntp_internal.h2
-rw-r--r--kernel/time/posix-clock.c4
-rw-r--r--kernel/time/tick-sched.c36
-rw-r--r--kernel/time/timekeeping.c49
-rw-r--r--kernel/time/timekeeping_internal.h8
-rw-r--r--kernel/trace/ring_buffer.c17
-rw-r--r--kernel/trace/trace_event_perf.c2
-rw-r--r--kernel/trace/trace_events.c16
-rw-r--r--kernel/trace/trace_printk.c1
-rw-r--r--kernel/watchdog.c20
-rw-r--r--kernel/workqueue.c220
-rw-r--r--lib/Kconfig.debug11
-rw-r--r--lib/atomic64_test.c124
-rw-r--r--lib/btree.c2
-rw-r--r--lib/dma-debug.c4
-rw-r--r--lib/list_debug.c2
-rw-r--r--lib/proportions.c2
-rw-r--r--lib/rhashtable.c70
-rw-r--r--mm/backing-dev.c19
-rw-r--r--mm/bootmem.c1
-rw-r--r--mm/huge_memory.c4
-rw-r--r--mm/hugetlb.c27
-rw-r--r--mm/kasan/kasan.c2
-rw-r--r--mm/memcontrol.c109
-rw-r--r--mm/memory.c8
-rw-r--r--mm/memory_hotplug.c31
-rw-r--r--mm/mremap.c4
-rw-r--r--mm/nobootmem.c1
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page-writeback.c6
-rw-r--r--mm/page_alloc.c3
-rw-r--r--mm/shmem.c215
-rw-r--r--mm/slab.c2
-rw-r--r--mm/slab.h2
-rw-r--r--mm/slab_common.c6
-rw-r--r--mm/slob.c2
-rw-r--r--mm/slub.c304
-rw-r--r--mm/vmalloc.c5
-rw-r--r--mm/vmstat.c18
-rw-r--r--mm/zswap.c6
-rw-r--r--net/8021q/vlan_core.c4
-rw-r--r--net/ax25/af_ax25.c3
-rw-r--r--net/batman-adv/distributed-arp-table.c5
-rw-r--r--net/batman-adv/routing.c19
-rw-r--r--net/batman-adv/translation-table.c16
-rw-r--r--net/bluetooth/af_bluetooth.c6
-rw-r--r--net/bluetooth/sco.c3
-rw-r--r--net/bluetooth/smp.c7
-rw-r--r--net/bridge/br_stp.c2
-rw-r--r--net/bridge/br_stp_if.c9
-rw-r--r--net/caif/caif_socket.c4
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c18
-rw-r--r--net/core/dst.c3
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/netclassid_cgroup.c28
-rw-r--r--net/core/netprio_cgroup.c9
-rw-r--r--net/core/rtnetlink.c274
-rw-r--r--net/core/scm.c2
-rw-r--r--net/core/skbuff.c6
-rw-r--r--net/core/sock.c19
-rw-r--r--net/core/stream.c6
-rw-r--r--net/dccp/ipv6.c37
-rw-r--r--net/dccp/proto.c3
-rw-r--r--net/decnet/af_decnet.c11
-rw-r--r--net/dns_resolver/dns_query.c2
-rw-r--r--net/hsr/hsr_device.c2
-rw-r--r--net/ipv4/af_inet.c3
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/fou.c3
-rw-r--r--net/ipv4/igmp.c5
-rw-r--r--net/ipv4/inet_connection_sock.c4
-rw-r--r--net/ipv4/ipip.c3
-rw-r--r--net/ipv4/ipmr.c23
-rw-r--r--net/ipv4/netfilter/Kconfig1
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/raw.c15
-rw-r--r--net/ipv4/tcp.c28
-rw-r--r--net/ipv4/tcp_diag.c2
-rw-r--r--net/ipv4/tcp_input.c26
-rw-r--r--net/ipv4/tcp_ipv4.c22
-rw-r--r--net/ipv4/tcp_output.c23
-rw-r--r--net/ipv4/tcp_timer.c14
-rw-r--r--net/ipv4/udp.c8
-rw-r--r--net/ipv4/xfrm4_policy.c46
-rw-r--r--net/ipv6/addrconf.c21
-rw-r--r--net/ipv6/addrlabel.c2
-rw-r--r--net/ipv6/af_inet6.c18
-rw-r--r--net/ipv6/datagram.c4
-rw-r--r--net/ipv6/exthdrs.c3
-rw-r--r--net/ipv6/icmp.c14
-rw-r--r--net/ipv6/inet6_connection_sock.c21
-rw-r--r--net/ipv6/ip6_gre.c8
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/ip6mr.c19
-rw-r--r--net/ipv6/ipv6_sockglue.c33
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/ndisc.c14
-rw-r--r--net/ipv6/netfilter/Kconfig1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c5
-rw-r--r--net/ipv6/raw.c8
-rw-r--r--net/ipv6/reassembly.c10
-rw-r--r--net/ipv6/route.c24
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/tcp_ipv6.c54
-rw-r--r--net/ipv6/udp.c8
-rw-r--r--net/ipv6/xfrm6_policy.c53
-rw-r--r--net/irda/af_irda.c3
-rw-r--r--net/iucv/af_iucv.c2
-rw-r--r--net/l2tp/l2tp_ip6.c8
-rw-r--r--net/mac80211/agg-tx.c3
-rw-r--r--net/mac80211/cfg.c11
-rw-r--r--net/mac80211/ieee80211_i.h4
-rw-r--r--net/mac80211/iface.c5
-rw-r--r--net/mac80211/main.c3
-rw-r--r--net/mac80211/mesh_pathtbl.c8
-rw-r--r--net/mac80211/mlme.c17
-rw-r--r--net/mac80211/rx.c3
-rw-r--r--net/mac80211/scan.c9
-rw-r--r--net/mac80211/util.c113
-rw-r--r--net/mac80211/vht.c10
-rw-r--r--net/mpls/af_mpls.c43
-rw-r--r--net/mpls/mpls_iptunnel.c4
-rw-r--r--net/netfilter/Kconfig6
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h17
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c14
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c64
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c18
-rw-r--r--net/netfilter/ipset/ip_set_core.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h26
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c16
-rw-r--r--net/netfilter/nf_tables_api.c99
-rw-r--r--net/netfilter/nf_tables_netdev.c2
-rw-r--r--net/netfilter/nfnetlink.c4
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nfnetlink_queue.c9
-rw-r--r--net/netfilter/nft_counter.c49
-rw-r--r--net/netfilter/nft_ct.c1
-rw-r--r--net/netfilter/nft_dynset.c5
-rw-r--r--net/nfc/llcp_sock.c2
-rw-r--r--net/openvswitch/conntrack.c22
-rw-r--r--net/openvswitch/dp_notify.c2
-rw-r--r--net/openvswitch/flow_netlink.c5
-rw-r--r--net/openvswitch/vport-geneve.c1
-rw-r--r--net/openvswitch/vport-gre.c1
-rw-r--r--net/openvswitch/vport-netdev.c8
-rw-r--r--net/openvswitch/vport.c8
-rw-r--r--net/openvswitch/vport.h8
-rw-r--r--net/packet/af_packet.c96
-rw-r--r--net/rds/connection.c6
-rw-r--r--net/rds/send.c4
-rw-r--r--net/rfkill/core.c6
-rw-r--r--net/rxrpc/ar-ack.c4
-rw-r--r--net/rxrpc/ar-output.c2
-rw-r--r--net/sched/sch_api.c25
-rw-r--r--net/sched/sch_generic.c6
-rw-r--r--net/sched/sch_mq.c4
-rw-r--r--net/sched/sch_mqprio.c4
-rw-r--r--net/sctp/auth.c4
-rw-r--r--net/sctp/ipv6.c24
-rw-r--r--net/sctp/outqueue.c2
-rw-r--r--net/sctp/sm_make_chunk.c4
-rw-r--r--net/sctp/sm_statefuns.c9
-rw-r--r--net/sctp/socket.c65
-rw-r--r--net/socket.c23
-rw-r--r--net/sunrpc/sched.c6
-rw-r--r--net/sunrpc/svc.c13
-rw-r--r--net/sunrpc/xprtsock.c14
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/socket.c10
-rw-r--r--net/tipc/udp_media.c7
-rw-r--r--net/unix/af_unix.c371
-rw-r--r--net/wireless/nl80211.c5
-rw-r--r--net/wireless/reg.c5
-rw-r--r--net/xfrm/xfrm_policy.c88
-rw-r--r--samples/bpf/Makefile7
-rwxr-xr-xscripts/kernel-doc2
-rwxr-xr-xscripts/link-vmlinux.sh2
-rw-r--r--scripts/recordmcount.c137
-rw-r--r--security/keys/encrypted-keys/encrypted.c2
-rw-r--r--security/keys/keyctl.c18
-rw-r--r--security/keys/trusted.c5
-rw-r--r--security/keys/user_defined.c5
-rw-r--r--security/selinux/ss/conditional.c4
-rw-r--r--security/smack/smack_lsm.c2
-rw-r--r--sound/firewire/dice/dice.c4
-rw-r--r--sound/pci/hda/hda_intel.c64
-rw-r--r--sound/pci/hda/patch_ca0132.c3
-rw-r--r--sound/pci/hda/patch_conexant.c5
-rw-r--r--sound/pci/hda/patch_hdmi.c9
-rw-r--r--sound/pci/hda/patch_realtek.c197
-rw-r--r--sound/pci/hda/patch_sigmatel.c45
-rw-r--r--sound/pci/rme96.c41
-rw-r--r--sound/soc/codecs/arizona.c18
-rw-r--r--sound/soc/codecs/es8328.c41
-rw-r--r--sound/soc/codecs/es8328.h1
-rw-r--r--sound/soc/codecs/nau8825.c31
-rw-r--r--sound/soc/codecs/rl6231.c6
-rw-r--r--sound/soc/codecs/rt5645.c65
-rw-r--r--sound/soc/codecs/rt5645.h4
-rw-r--r--sound/soc/codecs/rt5670.h12
-rw-r--r--sound/soc/codecs/rt5677.c100
-rw-r--r--sound/soc/codecs/sgtl5000.c1
-rw-r--r--sound/soc/codecs/wm8960.c2
-rw-r--r--sound/soc/codecs/wm8962.c4
-rw-r--r--sound/soc/codecs/wm8974.c1
-rw-r--r--sound/soc/davinci/davinci-mcasp.c16
-rw-r--r--sound/soc/fsl/Kconfig2
-rw-r--r--sound/soc/fsl/fsl_sai.c21
-rw-r--r--sound/soc/intel/Kconfig2
-rw-r--r--sound/soc/intel/skylake/skl-topology.c2
-rw-r--r--sound/soc/intel/skylake/skl.c4
-rw-r--r--sound/soc/intel/skylake/skl.h2
-rw-r--r--sound/soc/rockchip/rockchip_spdif.c8
-rw-r--r--sound/soc/rockchip/rockchip_spdif.h8
-rw-r--r--sound/soc/sh/rcar/gen.c2
-rw-r--r--sound/soc/sh/rcar/src.c7
-rw-r--r--sound/soc/soc-core.c6
-rw-r--r--sound/soc/soc-dapm.c7
-rw-r--r--sound/soc/soc-ops.c2
-rw-r--r--sound/soc/soc-topology.c3
-rw-r--r--sound/soc/sti/uniperif_player.c9
-rw-r--r--sound/soc/sti/uniperif_reader.c3
-rw-r--r--sound/soc/sunxi/sun4i-codec.c27
-rw-r--r--sound/usb/midi.c46
-rw-r--r--sound/usb/mixer.c2
-rw-r--r--sound/usb/mixer_maps.c12
-rw-r--r--sound/usb/mixer_quirks.c37
-rw-r--r--sound/usb/mixer_quirks.h4
-rw-r--r--sound/usb/quirks-table.h11
-rw-r--r--sound/usb/quirks.c2
-rw-r--r--sound/usb/usbaudio.h1
-rw-r--r--tools/Makefile13
-rw-r--r--tools/build/Makefile2
-rw-r--r--tools/build/Makefile.feature44
-rw-r--r--tools/build/Makefile.include2
-rw-r--r--tools/build/feature/Makefile93
-rw-r--r--tools/include/linux/bitmap.h (renamed from tools/perf/util/include/linux/bitmap.h)2
-rw-r--r--tools/include/linux/string.h15
-rw-r--r--tools/lib/bitmap.c (renamed from tools/perf/util/bitmap.c)0
-rw-r--r--tools/lib/bpf/Makefile14
-rw-r--r--tools/lib/bpf/bpf.c14
-rw-r--r--tools/lib/bpf/bpf.h2
-rw-r--r--tools/lib/bpf/libbpf.c412
-rw-r--r--tools/lib/bpf/libbpf.h88
-rw-r--r--tools/lib/find_bit.c84
-rw-r--r--tools/lib/string.c89
-rw-r--r--tools/lib/subcmd/Build7
-rw-r--r--tools/lib/subcmd/Makefile48
-rw-r--r--tools/lib/subcmd/exec-cmd.c209
-rw-r--r--tools/lib/subcmd/exec-cmd.h16
-rw-r--r--tools/lib/subcmd/help.c (renamed from tools/perf/util/help.c)179
-rw-r--r--tools/lib/subcmd/help.h (renamed from tools/perf/util/help.h)13
-rw-r--r--tools/lib/subcmd/pager.c (renamed from tools/perf/util/pager.c)23
-rw-r--r--tools/lib/subcmd/pager.h9
-rw-r--r--tools/lib/subcmd/parse-options.c (renamed from tools/perf/util/parse-options.c)250
-rw-r--r--tools/lib/subcmd/parse-options.h (renamed from tools/perf/util/parse-options.h)26
-rw-r--r--tools/lib/subcmd/run-command.c (renamed from tools/perf/util/run-command.c)24
-rw-r--r--tools/lib/subcmd/run-command.h (renamed from tools/perf/util/run-command.h)12
-rw-r--r--tools/lib/subcmd/sigchain.c (renamed from tools/perf/util/sigchain.c)3
-rw-r--r--tools/lib/subcmd/sigchain.h (renamed from tools/perf/util/sigchain.h)6
-rw-r--r--tools/lib/subcmd/subcmd-config.c11
-rw-r--r--tools/lib/subcmd/subcmd-config.h14
-rw-r--r--tools/lib/subcmd/subcmd-util.h91
-rw-r--r--tools/lib/traceevent/event-parse.c134
-rw-r--r--tools/lib/traceevent/event-parse.h4
-rw-r--r--tools/lib/util/find_next_bit.c89
-rw-r--r--tools/net/Makefile7
-rw-r--r--tools/perf/Build8
-rw-r--r--tools/perf/Documentation/perf-config.txt103
-rw-r--r--tools/perf/Documentation/perf-evlist.txt3
-rw-r--r--tools/perf/Documentation/perf-record.txt24
-rw-r--r--tools/perf/Documentation/perf-report.txt41
-rw-r--r--tools/perf/Documentation/perf-stat.txt34
-rw-r--r--tools/perf/Documentation/perf-top.txt3
-rw-r--r--tools/perf/Documentation/tips.txt14
-rw-r--r--tools/perf/MANIFEST7
-rw-r--r--tools/perf/Makefile.perf44
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h8
-rw-r--r--tools/perf/arch/x86/tests/insn-x86.c2
-rw-r--r--tools/perf/arch/x86/tests/intel-cqm.c4
-rw-r--r--tools/perf/arch/x86/tests/perf-time-to-tsc.c3
-rw-r--r--tools/perf/arch/x86/tests/rdpmc.c2
-rw-r--r--tools/perf/arch/x86/util/Build1
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c4
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c6
-rw-r--r--tools/perf/bench/futex-hash.c2
-rw-r--r--tools/perf/bench/futex-lock-pi.c2
-rw-r--r--tools/perf/bench/futex-requeue.c2
-rw-r--r--tools/perf/bench/futex-wake-parallel.c2
-rw-r--r--tools/perf/bench/futex-wake.c2
-rw-r--r--tools/perf/bench/mem-functions.c2
-rw-r--r--tools/perf/bench/numa.c2
-rw-r--r--tools/perf/bench/sched-messaging.c2
-rw-r--r--tools/perf/bench/sched-pipe.c2
-rw-r--r--tools/perf/builtin-annotate.c44
-rw-r--r--tools/perf/builtin-bench.c2
-rw-r--r--tools/perf/builtin-buildid-cache.c2
-rw-r--r--tools/perf/builtin-buildid-list.c4
-rw-r--r--tools/perf/builtin-config.c66
-rw-r--r--tools/perf/builtin-data.c2
-rw-r--r--tools/perf/builtin-diff.c15
-rw-r--r--tools/perf/builtin-evlist.c13
-rw-r--r--tools/perf/builtin-help.c10
-rw-r--r--tools/perf/builtin-inject.c3
-rw-r--r--tools/perf/builtin-kmem.c2
-rw-r--r--tools/perf/builtin-kvm.c5
-rw-r--r--tools/perf/builtin-list.c2
-rw-r--r--tools/perf/builtin-lock.c2
-rw-r--r--tools/perf/builtin-mem.c2
-rw-r--r--tools/perf/builtin-probe.c17
-rw-r--r--tools/perf/builtin-record.c48
-rw-r--r--tools/perf/builtin-report.c58
-rw-r--r--tools/perf/builtin-sched.c2
-rw-r--r--tools/perf/builtin-script.c245
-rw-r--r--tools/perf/builtin-stat.c679
-rw-r--r--tools/perf/builtin-timechart.c2
-rw-r--r--tools/perf/builtin-top.c75
-rw-r--r--tools/perf/builtin-trace.c4
-rw-r--r--tools/perf/builtin-version.c10
-rw-r--r--tools/perf/builtin.h1
-rw-r--r--tools/perf/command-list.txt3
-rw-r--r--tools/perf/config/Makefile24
-rw-r--r--tools/perf/config/utilities.mak19
-rw-r--r--tools/perf/perf.c24
-rw-r--r--tools/perf/scripts/python/stat-cpi.py77
-rw-r--r--tools/perf/tests/.gitignore1
-rw-r--r--tools/perf/tests/Build16
-rw-r--r--tools/perf/tests/attr.c6
-rw-r--r--tools/perf/tests/bp_signal.c2
-rw-r--r--tools/perf/tests/bp_signal_overflow.c2
-rw-r--r--tools/perf/tests/bpf-script-test-prologue.c35
-rw-r--r--tools/perf/tests/bpf.c93
-rw-r--r--tools/perf/tests/builtin-test.c141
-rw-r--r--tools/perf/tests/code-reading.c16
-rw-r--r--tools/perf/tests/cpumap.c88
-rw-r--r--tools/perf/tests/dso-data.c6
-rw-r--r--tools/perf/tests/dwarf-unwind.c37
-rw-r--r--tools/perf/tests/event_update.c117
-rw-r--r--tools/perf/tests/evsel-roundtrip-name.c5
-rw-r--r--tools/perf/tests/evsel-tp-sched.c2
-rw-r--r--tools/perf/tests/fdarray.c4
-rw-r--r--tools/perf/tests/hists_common.c6
-rw-r--r--tools/perf/tests/hists_cumulate.c10
-rw-r--r--tools/perf/tests/hists_filter.c4
-rw-r--r--tools/perf/tests/hists_link.c10
-rw-r--r--tools/perf/tests/hists_output.c12
-rw-r--r--tools/perf/tests/keep-tracking.c5
-rw-r--r--tools/perf/tests/kmod-path.c2
-rw-r--r--tools/perf/tests/llvm.c75
-rw-r--r--tools/perf/tests/llvm.h2
-rw-r--r--tools/perf/tests/make3
-rw-r--r--tools/perf/tests/mmap-basic.c2
-rw-r--r--tools/perf/tests/mmap-thread-lookup.c8
-rw-r--r--tools/perf/tests/openat-syscall-all-cpus.c2
-rw-r--r--tools/perf/tests/openat-syscall-tp-fields.c2
-rw-r--r--tools/perf/tests/openat-syscall.c2
-rw-r--r--tools/perf/tests/parse-events.c2
-rw-r--r--tools/perf/tests/parse-no-sample-id-all.c2
-rw-r--r--tools/perf/tests/perf-record.c8
-rw-r--r--tools/perf/tests/pmu.c2
-rw-r--r--tools/perf/tests/python-use.c3
-rw-r--r--tools/perf/tests/sample-parsing.c2
-rw-r--r--tools/perf/tests/stat.c111
-rw-r--r--tools/perf/tests/sw-clock.c2
-rw-r--r--tools/perf/tests/switch-tracking.c8
-rw-r--r--tools/perf/tests/task-exit.c2
-rw-r--r--tools/perf/tests/tests.h95
-rw-r--r--tools/perf/tests/thread-map.c45
-rw-r--r--tools/perf/tests/thread-mg-share.c2
-rw-r--r--tools/perf/tests/topology.c2
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c2
-rw-r--r--tools/perf/ui/browser.c2
-rw-r--r--tools/perf/ui/browsers/hists.c342
-rw-r--r--tools/perf/ui/gtk/hists.c152
-rw-r--r--tools/perf/ui/hist.c14
-rw-r--r--tools/perf/ui/stdio/hist.c100
-rw-r--r--tools/perf/util/Build28
-rw-r--r--tools/perf/util/annotate.c23
-rw-r--r--tools/perf/util/auxtrace.c2
-rw-r--r--tools/perf/util/bpf-loader.c433
-rw-r--r--tools/perf/util/bpf-loader.h4
-rw-r--r--tools/perf/util/bpf-prologue.c455
-rw-r--r--tools/perf/util/bpf-prologue.h34
-rw-r--r--tools/perf/util/build-id.c3
-rw-r--r--tools/perf/util/cache.h14
-rw-r--r--tools/perf/util/callchain.c164
-rw-r--r--tools/perf/util/callchain.h30
-rw-r--r--tools/perf/util/cgroup.c2
-rw-r--r--tools/perf/util/color.c2
-rw-r--r--tools/perf/util/config.c2
-rw-r--r--tools/perf/util/cpumap.c51
-rw-r--r--tools/perf/util/cpumap.h1
-rw-r--r--tools/perf/util/data-convert-bt.c2
-rw-r--r--tools/perf/util/dso.c19
-rw-r--r--tools/perf/util/dso.h1
-rw-r--r--tools/perf/util/env.c9
-rw-r--r--tools/perf/util/environment.c8
-rw-r--r--tools/perf/util/event.c308
-rw-r--r--tools/perf/util/event.h150
-rw-r--r--tools/perf/util/evlist.c112
-rw-r--r--tools/perf/util/evlist.h10
-rw-r--r--tools/perf/util/evsel.c53
-rw-r--r--tools/perf/util/evsel.h4
-rw-r--r--tools/perf/util/exec_cmd.c148
-rw-r--r--tools/perf/util/exec_cmd.h12
-rwxr-xr-xtools/perf/util/generate-cmdlist.sh15
-rw-r--r--tools/perf/util/header.c207
-rw-r--r--tools/perf/util/header.h17
-rw-r--r--tools/perf/util/help-unknown-cmd.c103
-rw-r--r--tools/perf/util/help-unknown-cmd.h0
-rw-r--r--tools/perf/util/hist.c118
-rw-r--r--tools/perf/util/hist.h24
-rw-r--r--tools/perf/util/include/linux/string.h3
-rw-r--r--tools/perf/util/intel-pt.c4
-rw-r--r--tools/perf/util/machine.c75
-rw-r--r--tools/perf/util/map.c7
-rw-r--r--tools/perf/util/parse-branch-options.c2
-rw-r--r--tools/perf/util/parse-events.c10
-rw-r--r--tools/perf/util/parse-regs-options.c2
-rw-r--r--tools/perf/util/path.c18
-rw-r--r--tools/perf/util/pmu.c1
-rw-r--r--tools/perf/util/probe-event.c7
-rw-r--r--tools/perf/util/probe-finder.c30
-rw-r--r--tools/perf/util/python-ext-sources2
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c115
-rw-r--r--tools/perf/util/session.c200
-rw-r--r--tools/perf/util/session.h2
-rw-r--r--tools/perf/util/sort.c601
-rw-r--r--tools/perf/util/sort.h14
-rw-r--r--tools/perf/util/stat.c62
-rw-r--r--tools/perf/util/stat.h10
-rw-r--r--tools/perf/util/string.c16
-rw-r--r--tools/perf/util/symbol-elf.c9
-rw-r--r--tools/perf/util/symbol.c100
-rw-r--r--tools/perf/util/symbol.h5
-rw-r--r--tools/perf/util/term.c35
-rw-r--r--tools/perf/util/term.h10
-rw-r--r--tools/perf/util/thread.c10
-rw-r--r--tools/perf/util/thread_map.c28
-rw-r--r--tools/perf/util/thread_map.h3
-rw-r--r--tools/perf/util/tool.h8
-rw-r--r--tools/perf/util/trace-event.h4
-rw-r--r--tools/perf/util/unwind-libdw.c63
-rw-r--r--tools/perf/util/unwind-libdw.h2
-rw-r--r--tools/perf/util/unwind-libunwind.c80
-rw-r--r--tools/perf/util/util.c67
-rw-r--r--tools/perf/util/util.h20
-rw-r--r--tools/power/x86/turbostat/turbostat.c8
-rw-r--r--tools/testing/nvdimm/test/nfit.c49
-rw-r--r--tools/testing/selftests/futex/README2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh9
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh22
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh41
-rw-r--r--tools/testing/selftests/rcutorture/doc/TINY_RCU.txt1
-rw-r--r--tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt4
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c11
-rw-r--r--tools/testing/selftests/timers/clocksource-switch.c2
-rw-r--r--tools/virtio/linux/kernel.h6
-rw-r--r--tools/virtio/linux/virtio.h6
-rw-r--r--tools/virtio/linux/virtio_config.h20
-rw-r--r--tools/vm/page-types.c1
-rw-r--r--virt/kvm/arm/arch_timer.c28
-rw-r--r--virt/kvm/arm/vgic.c50
2040 files changed, 41151 insertions, 18900 deletions
diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt
index 31d1d658827f..c0d8788e75d3 100644
--- a/Documentation/IPMI.txt
+++ b/Documentation/IPMI.txt
@@ -587,7 +587,7 @@ used to control it:
modprobe ipmi_watchdog timeout=<t> pretimeout=<t> action=<action type>
preaction=<preaction type> preop=<preop type> start_now=x
- nowayout=x ifnum_to_use=n
+ nowayout=x ifnum_to_use=n panic_wdt_timeout=<t>
ifnum_to_use specifies which interface the watchdog timer should use.
The default is -1, which means to pick the first one registered.
@@ -597,7 +597,9 @@ is the amount of seconds before the reset that the pre-timeout panic will
occur (if pretimeout is zero, then pretimeout will not be enabled). Note
that the pretimeout is the time before the final timeout. So if the
timeout is 50 seconds and the pretimeout is 10 seconds, then the pretimeout
-will occur in 40 second (10 seconds before the timeout).
+will occur in 40 second (10 seconds before the timeout). The panic_wdt_timeout
+is the value of timeout which is set on kernel panic, in order to let actions
+such as kdump to occur during panic.
The action may be "reset", "power_cycle", or "power_off", and
specifies what to do when the timer times out, and defaults to
@@ -634,6 +636,7 @@ for configuring the watchdog:
ipmi_watchdog.preop=<preop type>
ipmi_watchdog.start_now=x
ipmi_watchdog.nowayout=x
+ ipmi_watchdog.panic_wdt_timeout=<t>
The options are the same as the module parameter options.
diff --git a/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png b/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png
new file mode 100644
index 000000000000..7496a55e4e7b
--- /dev/null
+++ b/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png
Binary files differ
diff --git a/Documentation/RCU/Design/Requirements/GPpartitionReaders1.svg b/Documentation/RCU/Design/Requirements/GPpartitionReaders1.svg
new file mode 100644
index 000000000000..4b4014fda770
--- /dev/null
+++ b/Documentation/RCU/Design/Requirements/GPpartitionReaders1.svg
@@ -0,0 +1,374 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="447.99197"
+ height="428.19299"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.3.1 r9886"
+ sodipodi:docname="GPpartitionReaders1.svg">
+ <defs
+ id="defs4">
+ <marker
+ inkscape:stockid="Arrow2Lend"
+ orient="auto"
+ refY="0"
+ refX="0"
+ id="Arrow2Lend"
+ style="overflow:visible">
+ <path
+ id="path3792"
+ style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
+ d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+ transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
+ inkscape:connector-curvature="0" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow2Lstart"
+ orient="auto"
+ refY="0"
+ refX="0"
+ id="Arrow2Lstart"
+ style="overflow:visible">
+ <path
+ id="path3789"
+ style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
+ d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+ transform="matrix(1.1,0,0,1.1,1.1,0)"
+ inkscape:connector-curvature="0" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ id="base"
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1.0"
+ inkscape:pageopacity="0.0"
+ inkscape:pageshadow="2"
+ inkscape:zoom="1.6184291"
+ inkscape:cx="223.99599"
+ inkscape:cy="214.0965"
+ inkscape:document-units="px"
+ inkscape:current-layer="layer1"
+ showgrid="false"
+ inkscape:window-width="979"
+ inkscape:window-height="836"
+ inkscape:window-x="571"
+ inkscape:window-y="335"
+ inkscape:window-maximized="0"
+ fit-margin-top="5"
+ fit-margin-left="5"
+ fit-margin-right="5"
+ fit-margin-bottom="5" />
+ <metadata
+ id="metadata7">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <g
+ inkscape:label="Layer 1"
+ inkscape:groupmode="layer"
+ id="layer1"
+ transform="translate(-28.441125,-185.60612)">
+ <flowRoot
+ xml:space="preserve"
+ id="flowRoot2985"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"><flowRegion
+ id="flowRegion2987"><rect
+ id="rect2989"
+ width="82.85714"
+ height="11.428572"
+ x="240"
+ y="492.36218" /></flowRegion><flowPara
+ id="flowPara2991"></flowPara></flowRoot> <g
+ id="g4433"
+ transform="translate(2,0)">
+ <text
+ sodipodi:linespacing="125%"
+ id="text2993"
+ y="-261.66608"
+ x="412.12299"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ xml:space="preserve"
+ transform="matrix(0,1,-1,0,0,0)"><tspan
+ y="-261.66608"
+ x="412.12299"
+ id="tspan2995"
+ sodipodi:role="line">synchronize_rcu()</tspan></text>
+ <g
+ id="g4417"
+ transform="matrix(0,1,-1,0,730.90257,222.4928)">
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#Arrow2Lstart);marker-end:url(#Arrow2Lend)"
+ d="m 97.580736,477.4048 183.140664,0"
+ id="path2997"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ d="m 96.752718,465.38398 0,22.62742"
+ id="path4397"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ d="m 281.54942,465.38397 0,22.62742"
+ id="path4397-5"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ </g>
+ </g>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="112.04738"
+ y="268.18076"
+ id="text4429"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431"
+ x="112.04738"
+ y="268.18076">WRITE_ONCE(a, 1);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="112.04738"
+ y="439.13766"
+ id="text4441"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4443"
+ x="112.04738"
+ y="439.13766">WRITE_ONCE(b, 1);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="255.60869"
+ y="309.29346"
+ id="text4445"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4447"
+ x="255.60869"
+ y="309.29346">r1 = READ_ONCE(a);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="255.14423"
+ y="520.61786"
+ id="text4449"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4451"
+ x="255.14423"
+ y="520.61786">WRITE_ONCE(c, 1);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.10254"
+ y="384.71124"
+ id="text4453"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4455"
+ x="396.10254"
+ y="384.71124">r2 = READ_ONCE(b);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.10254"
+ y="582.13617"
+ id="text4457"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4459"
+ x="396.10254"
+ y="582.13617">r3 = READ_ONCE(c);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="112.08231"
+ y="213.91006"
+ id="text4461"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4463"
+ x="112.08231"
+ y="213.91006">thread0()</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="252.34512"
+ y="213.91006"
+ id="text4461-6"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4463-0"
+ x="252.34512"
+ y="213.91006">thread1()</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.42557"
+ y="213.91006"
+ id="text4461-2"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4463-2"
+ x="396.42557"
+ y="213.91006">thread2()</tspan></text>
+ <rect
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="rect4495"
+ width="436.28488"
+ height="416.4859"
+ x="34.648232"
+ y="191.10612" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ d="m 183.14066,191.10612 0,417.193 -0.70711,0"
+ id="path4497"
+ inkscape:connector-curvature="0" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ d="m 325.13867,191.10612 0,417.193 -0.70711,0"
+ id="path4497-5"
+ inkscape:connector-curvature="0" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="111.75929"
+ y="251.53981"
+ id="text4429-8"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9"
+ x="111.75929"
+ y="251.53981">rcu_read_lock();</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.10254"
+ y="367.91556"
+ id="text4429-8-9"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9-4"
+ x="396.10254"
+ y="367.91556">rcu_read_lock();</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.10254"
+ y="597.40289"
+ id="text4429-8-9-3"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9-4-4"
+ x="396.10254"
+ y="597.40289">rcu_read_unlock();</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="111.75929"
+ y="453.15311"
+ id="text4429-8-9-3-1"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9-4-4-6"
+ x="111.75929"
+ y="453.15311">rcu_read_unlock();</tspan></text>
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ d="m 33.941125,227.87568 436.284885,0 0,0.7071"
+ id="path4608"
+ inkscape:connector-curvature="0" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="394.94427"
+ y="345.66351"
+ id="text4648"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650"
+ x="394.94427"
+ y="345.66351">QS</tspan></text>
+ <path
+ sodipodi:type="arc"
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(36.441125,199.60612)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="112.11968"
+ y="475.77856"
+ id="text4648-4"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650-4"
+ x="112.11968"
+ y="475.77856">QS</tspan></text>
+ <path
+ sodipodi:type="arc"
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652-7"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(-246.38346,329.72117)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <path
+ sodipodi:type="arc"
+ style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652-7-7"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(-103.65246,202.90878)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="254.85066"
+ y="348.96619"
+ id="text4648-4-3"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650-4-5"
+ x="254.85066"
+ y="348.96619">QS</tspan></text>
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Requirements/RCUApplicability.svg b/Documentation/RCU/Design/Requirements/RCUApplicability.svg
new file mode 100644
index 000000000000..ebcbeee391ed
--- /dev/null
+++ b/Documentation/RCU/Design/Requirements/RCUApplicability.svg
@@ -0,0 +1,237 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5d -->
+
+<!-- CreationDate: Tue Mar 4 18:34:25 2014 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="1089.1382"
+ height="668.21368"
+ viewBox="-2121 -36 14554.634 8876.4061"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.3.1 r9886"
+ sodipodi:docname="RCUApplicability.svg">
+ <metadata
+ id="metadata40">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title />
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs38" />
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="849"
+ inkscape:window-height="639"
+ id="namedview36"
+ showgrid="false"
+ inkscape:zoom="0.51326165"
+ inkscape:cx="544.56912"
+ inkscape:cy="334.10686"
+ inkscape:window-x="149"
+ inkscape:window-y="448"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4"
+ fit-margin-top="5"
+ fit-margin-left="5"
+ fit-margin-right="5"
+ fit-margin-bottom="5" />
+ <g
+ style="fill:none;stroke-width:0.025in"
+ id="g4"
+ transform="translate(-2043.6828,14.791398)">
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="0"
+ width="14400"
+ height="8775"
+ rx="0"
+ style="fill:#ffa1a1;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="1350"
+ y="0"
+ width="11700"
+ height="6075"
+ rx="0"
+ style="fill:#ffff00;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="2700"
+ y="0"
+ width="9000"
+ height="4275"
+ rx="0"
+ style="fill:#00ff00;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="4050"
+ y="0"
+ width="6300"
+ height="2475"
+ rx="0"
+ style="fill:#87cfff;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
+ id="rect12" />
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="900"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text14"
+ sodipodi:linespacing="125%"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ id="tspan3017">Read-Mostly, Stale &amp;</tspan></text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="1350"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text16"
+ sodipodi:linespacing="125%"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ id="tspan3019">Inconsistent Data OK</tspan></text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="1800"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text18"
+ sodipodi:linespacing="125%"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ id="tspan3021">(RCU Works Great!!!)</tspan></text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="3825"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text20"
+ sodipodi:linespacing="125%"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ id="tspan3023">(RCU Works Well)</tspan></text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="3375"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text22"
+ sodipodi:linespacing="125%"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ id="tspan3025">Read-Mostly, Need Consistent Data</tspan></text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="5175"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text24"
+ sodipodi:linespacing="125%"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ id="tspan3027">Read-Write, Need Consistent Data</tspan></text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="6975"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text26"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ sodipodi:linespacing="125%">Update-Mostly, Need Consistent Data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="5625"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text28"
+ sodipodi:linespacing="125%"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ id="tspan3029">(RCU Might Be OK...)</tspan></text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="7875"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text30"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ sodipodi:linespacing="125%">(1) Provide Existence Guarantees For Update-Friendly Mechanisms</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="8325"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text32"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ sodipodi:linespacing="125%">(2) Provide Wait-Free Read-Side Primitives for Real-Time Use)</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="7425"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text34"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ sodipodi:linespacing="125%">(RCU is Very Unlikely to be the Right Tool For The Job, But it Can:</text>
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Requirements/ReadersPartitionGP1.svg b/Documentation/RCU/Design/Requirements/ReadersPartitionGP1.svg
new file mode 100644
index 000000000000..48cd1623d4d4
--- /dev/null
+++ b/Documentation/RCU/Design/Requirements/ReadersPartitionGP1.svg
@@ -0,0 +1,639 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="735.25"
+ height="516.21875"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.3.1 r9886"
+ sodipodi:docname="ReadersPartitionGP1.svg">
+ <defs
+ id="defs4">
+ <marker
+ inkscape:stockid="Arrow2Lend"
+ orient="auto"
+ refY="0"
+ refX="0"
+ id="Arrow2Lend"
+ style="overflow:visible">
+ <path
+ id="path3792"
+ style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
+ d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+ transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
+ inkscape:connector-curvature="0" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow2Lstart"
+ orient="auto"
+ refY="0"
+ refX="0"
+ id="Arrow2Lstart"
+ style="overflow:visible">
+ <path
+ id="path3789"
+ style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
+ d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+ transform="matrix(1.1,0,0,1.1,1.1,0)"
+ inkscape:connector-curvature="0" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow2Lstart"
+ orient="auto"
+ refY="0"
+ refX="0"
+ id="Arrow2Lstart-4"
+ style="overflow:visible">
+ <path
+ id="path3789-9"
+ style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
+ d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+ transform="matrix(1.1,0,0,1.1,1.1,0)"
+ inkscape:connector-curvature="0" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow2Lend"
+ orient="auto"
+ refY="0"
+ refX="0"
+ id="Arrow2Lend-4"
+ style="overflow:visible">
+ <path
+ id="path3792-4"
+ style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
+ d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+ transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
+ inkscape:connector-curvature="0" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ id="base"
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1.0"
+ inkscape:pageopacity="0.0"
+ inkscape:pageshadow="2"
+ inkscape:zoom="1.3670394"
+ inkscape:cx="367.26465"
+ inkscape:cy="258.46182"
+ inkscape:document-units="px"
+ inkscape:current-layer="g4433-6"
+ showgrid="false"
+ inkscape:window-width="1351"
+ inkscape:window-height="836"
+ inkscape:window-x="438"
+ inkscape:window-y="335"
+ inkscape:window-maximized="0"
+ fit-margin-top="5"
+ fit-margin-left="5"
+ fit-margin-right="5"
+ fit-margin-bottom="5" />
+ <metadata
+ id="metadata7">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title />
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <g
+ inkscape:label="Layer 1"
+ inkscape:groupmode="layer"
+ id="layer1"
+ transform="translate(-29.15625,-185.59375)">
+ <flowRoot
+ xml:space="preserve"
+ id="flowRoot2985"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"><flowRegion
+ id="flowRegion2987"><rect
+ id="rect2989"
+ width="82.85714"
+ height="11.428572"
+ x="240"
+ y="492.36218" /></flowRegion><flowPara
+ id="flowPara2991" /></flowRoot> <g
+ id="g4433"
+ transform="translate(2,-12)">
+ <text
+ sodipodi:linespacing="125%"
+ id="text2993"
+ y="-261.66608"
+ x="436.12299"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ xml:space="preserve"
+ transform="matrix(0,1,-1,0,0,0)"><tspan
+ y="-261.66608"
+ x="436.12299"
+ id="tspan2995"
+ sodipodi:role="line">synchronize_rcu()</tspan></text>
+ <g
+ id="g4417"
+ transform="matrix(0,1,-1,0,730.90257,222.4928)">
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#Arrow2Lstart);marker-end:url(#Arrow2Lend)"
+ d="M 97.580736,477.4048 327.57913,476.09759"
+ id="path2997"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ d="m 96.752718,465.38398 0,22.62742"
+ id="path4397"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ d="m 328.40703,465.38397 0,22.62742"
+ id="path4397-5"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ </g>
+ </g>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="112.04738"
+ y="268.18076"
+ id="text4429"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431"
+ x="112.04738"
+ y="268.18076">WRITE_ONCE(a, 1);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="112.04738"
+ y="487.13766"
+ id="text4441"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4443"
+ x="112.04738"
+ y="487.13766">WRITE_ONCE(b, 1);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="255.60869"
+ y="297.29346"
+ id="text4445"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4447"
+ x="255.60869"
+ y="297.29346">r1 = READ_ONCE(a);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="255.14423"
+ y="554.61786"
+ id="text4449"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4451"
+ x="255.14423"
+ y="554.61786">WRITE_ONCE(c, 1);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.10254"
+ y="370.71124"
+ id="text4453"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4455"
+ x="396.10254"
+ y="370.71124">WRITE_ONCE(d, 1);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.10254"
+ y="572.13617"
+ id="text4457"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4459"
+ x="396.10254"
+ y="572.13617">r2 = READ_ONCE(c);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="112.08231"
+ y="213.91006"
+ id="text4461"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4463"
+ x="112.08231"
+ y="213.91006">thread0()</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="252.34512"
+ y="213.91006"
+ id="text4461-6"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4463-0"
+ x="252.34512"
+ y="213.91006">thread1()</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.42557"
+ y="213.91006"
+ id="text4461-2"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4463-2"
+ x="396.42557"
+ y="213.91006">thread2()</tspan></text>
+ <rect
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="rect4495"
+ width="724.25244"
+ height="505.21201"
+ x="34.648232"
+ y="191.10612" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ d="m 183.14066,191.10612 0,504.24243"
+ id="path4497"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ d="m 325.13867,191.10612 0,504.24243"
+ id="path4497-5"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="111.75929"
+ y="251.53981"
+ id="text4429-8"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9"
+ x="111.75929"
+ y="251.53981">rcu_read_lock();</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.10254"
+ y="353.91556"
+ id="text4429-8-9"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9-4"
+ x="396.10254"
+ y="353.91556">rcu_read_lock();</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.10254"
+ y="587.40289"
+ id="text4429-8-9-3"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9-4-4"
+ x="396.10254"
+ y="587.40289">rcu_read_unlock();</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="111.75929"
+ y="501.15311"
+ id="text4429-8-9-3-1"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9-4-4-6"
+ x="111.75929"
+ y="501.15311">rcu_read_unlock();</tspan></text>
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ d="m 33.941125,227.87568 724.941765,0"
+ id="path4608"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="394.94427"
+ y="331.66351"
+ id="text4648"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650"
+ x="394.94427"
+ y="331.66351">QS</tspan></text>
+ <path
+ sodipodi:type="arc"
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(36.441125,185.60612)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="112.11968"
+ y="523.77856"
+ id="text4648-4"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650-4"
+ x="112.11968"
+ y="523.77856">QS</tspan></text>
+ <path
+ sodipodi:type="arc"
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652-7"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(-246.38346,377.72117)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <path
+ sodipodi:type="arc"
+ style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652-7-7"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(-103.65246,190.90878)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="254.85066"
+ y="336.96619"
+ id="text4648-4-3"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650-4-5"
+ x="254.85066"
+ y="336.96619">QS</tspan></text>
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ d="m 470.93311,190.39903 0,504.24243"
+ id="path4497-5-6"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ d="m 616.22755,190.38323 0,504.24243"
+ id="path4497-5-2"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <g
+ id="g4433-6"
+ transform="translate(288.0964,78.32827)">
+ <text
+ sodipodi:linespacing="125%"
+ id="text2993-7"
+ y="-261.66608"
+ x="440.12299"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ xml:space="preserve"
+ transform="matrix(0,1,-1,0,0,0)"><tspan
+ y="-261.66608"
+ x="440.12299"
+ id="tspan2995-1"
+ sodipodi:role="line">synchronize_rcu()</tspan></text>
+ <g
+ id="g4417-1"
+ transform="matrix(0,1,-1,0,730.90257,222.4928)">
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#Arrow2Lstart);marker-end:url(#Arrow2Lend)"
+ d="M 97.580736,477.4048 328.5624,477.07246"
+ id="path2997-2"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ d="m 96.752718,465.38398 0,22.62742"
+ id="path4397-3"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ d="m 329.39039,465.38397 0,22.62742"
+ id="path4397-5-4"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ </g>
+ </g>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="541.70508"
+ y="387.6217"
+ id="text4445-0"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4447-5"
+ x="541.70508"
+ y="387.6217">r3 = READ_ONCE(d);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="541.2406"
+ y="646.94611"
+ id="text4449-6"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4451-6"
+ x="541.2406"
+ y="646.94611">WRITE_ONCE(e, 1);</tspan></text>
+ <path
+ sodipodi:type="arc"
+ style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652-7-7-5"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(182.44393,281.23704)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="540.94702"
+ y="427.29443"
+ id="text4648-4-3-1"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650-4-5-7"
+ x="540.94702"
+ y="427.29443">QS</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="686.27747"
+ y="461.83929"
+ id="text4453-7"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4455-1"
+ x="686.27747"
+ y="461.83929">r4 = READ_ONCE(b);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="686.27747"
+ y="669.26422"
+ id="text4457-9"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4459-2"
+ x="686.27747"
+ y="669.26422">r5 = READ_ONCE(e);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="686.27747"
+ y="445.04358"
+ id="text4429-8-9-33"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9-4-2"
+ x="686.27747"
+ y="445.04358">rcu_read_lock();</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="686.27747"
+ y="684.53094"
+ id="text4429-8-9-3-8"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9-4-4-5"
+ x="686.27747"
+ y="684.53094">rcu_read_unlock();</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="685.11914"
+ y="422.79153"
+ id="text4648-9"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650-7"
+ x="685.11914"
+ y="422.79153">QS</tspan></text>
+ <path
+ sodipodi:type="arc"
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652-8"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(326.61602,276.73415)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="397.85934"
+ y="609.59003"
+ id="text4648-5"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650-77"
+ x="397.85934"
+ y="609.59003">QS</tspan></text>
+ <path
+ sodipodi:type="arc"
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652-80"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(39.356201,463.53264)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="256.75986"
+ y="586.99133"
+ id="text4648-5-2"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650-77-7"
+ x="256.75986"
+ y="586.99133">QS</tspan></text>
+ <path
+ sodipodi:type="arc"
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652-80-5"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(-101.74328,440.93395)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="546.22791"
+ y="213.91006"
+ id="text4461-2-5"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4463-2-6"
+ x="546.22791"
+ y="213.91006">thread3()</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="684.00067"
+ y="213.91006"
+ id="text4461-2-1"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4463-2-0"
+ x="684.00067"
+ y="213.91006">thread4()</tspan></text>
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
new file mode 100644
index 000000000000..a725f9900ec8
--- /dev/null
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -0,0 +1,2897 @@
+<!-- DO NOT HAND EDIT. -->
+<!-- Instead, edit Documentation/RCU/Design/Requirements/Requirements.htmlx and run 'sh htmlqqz.sh Documentation/RCU/Design/Requirements/Requirements' -->
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+ "http://www.w3.org/TR/html4/loose.dtd">
+ <html>
+ <head><title>A Tour Through RCU's Requirements [LWN.net]</title>
+ <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+
+<h1>A Tour Through RCU's Requirements</h1>
+
+<p>Copyright IBM Corporation, 2015</p>
+<p>Author: Paul E.&nbsp;McKenney</p>
+<p><i>The initial version of this document appeared in the
+<a href="https://lwn.net/">LWN</a> articles
+<a href="https://lwn.net/Articles/652156/">here</a>,
+<a href="https://lwn.net/Articles/652677/">here</a>, and
+<a href="https://lwn.net/Articles/653326/">here</a>.</i></p>
+
+<h2>Introduction</h2>
+
+<p>
+Read-copy update (RCU) is a synchronization mechanism that is often
+used as a replacement for reader-writer locking.
+RCU is unusual in that updaters do not block readers,
+which means that RCU's read-side primitives can be exceedingly fast
+and scalable.
+In addition, updaters can make useful forward progress concurrently
+with readers.
+However, all this concurrency between RCU readers and updaters does raise
+the question of exactly what RCU readers are doing, which in turn
+raises the question of exactly what RCU's requirements are.
+
+<p>
+This document therefore summarizes RCU's requirements, and can be thought
+of as an informal, high-level specification for RCU.
+It is important to understand that RCU's specification is primarily
+empirical in nature;
+in fact, I learned about many of these requirements the hard way.
+This situation might cause some consternation, however, not only
+has this learning process been a lot of fun, but it has also been
+a great privilege to work with so many people willing to apply
+technologies in interesting new ways.
+
+<p>
+All that aside, here are the categories of currently known RCU requirements:
+</p>
+
+<ol>
+<li> <a href="#Fundamental Requirements">
+ Fundamental Requirements</a>
+<li> <a href="#Fundamental Non-Requirements">Fundamental Non-Requirements</a>
+<li> <a href="#Parallelism Facts of Life">
+ Parallelism Facts of Life</a>
+<li> <a href="#Quality-of-Implementation Requirements">
+ Quality-of-Implementation Requirements</a>
+<li> <a href="#Linux Kernel Complications">
+ Linux Kernel Complications</a>
+<li> <a href="#Software-Engineering Requirements">
+ Software-Engineering Requirements</a>
+<li> <a href="#Other RCU Flavors">
+ Other RCU Flavors</a>
+<li> <a href="#Possible Future Changes">
+ Possible Future Changes</a>
+</ol>
+
+<p>
+This is followed by a <a href="#Summary">summary</a>,
+which is in turn followed by the inevitable
+<a href="#Answers to Quick Quizzes">answers to the quick quizzes</a>.
+
+<h2><a name="Fundamental Requirements">Fundamental Requirements</a></h2>
+
+<p>
+RCU's fundamental requirements are the closest thing RCU has to hard
+mathematical requirements.
+These are:
+
+<ol>
+<li> <a href="#Grace-Period Guarantee">
+ Grace-Period Guarantee</a>
+<li> <a href="#Publish-Subscribe Guarantee">
+ Publish-Subscribe Guarantee</a>
+<li> <a href="#Memory-Barrier Guarantees">
+ Memory-Barrier Guarantees</a>
+<li> <a href="#RCU Primitives Guaranteed to Execute Unconditionally">
+ RCU Primitives Guaranteed to Execute Unconditionally</a>
+<li> <a href="#Guaranteed Read-to-Write Upgrade">
+ Guaranteed Read-to-Write Upgrade</a>
+</ol>
+
+<h3><a name="Grace-Period Guarantee">Grace-Period Guarantee</a></h3>
+
+<p>
+RCU's grace-period guarantee is unusual in being premeditated:
+Jack Slingwine and I had this guarantee firmly in mind when we started
+work on RCU (then called &ldquo;rclock&rdquo;) in the early 1990s.
+That said, the past two decades of experience with RCU have produced
+a much more detailed understanding of this guarantee.
+
+<p>
+RCU's grace-period guarantee allows updaters to wait for the completion
+of all pre-existing RCU read-side critical sections.
+An RCU read-side critical section
+begins with the marker <tt>rcu_read_lock()</tt> and ends with
+the marker <tt>rcu_read_unlock()</tt>.
+These markers may be nested, and RCU treats a nested set as one
+big RCU read-side critical section.
+Production-quality implementations of <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> are extremely lightweight, and in
+fact have exactly zero overhead in Linux kernels built for production
+use with <tt>CONFIG_PREEMPT=n</tt>.
+
+<p>
+This guarantee allows ordering to be enforced with extremely low
+overhead to readers, for example:
+
+<blockquote>
+<pre>
+ 1 int x, y;
+ 2
+ 3 void thread0(void)
+ 4 {
+ 5 rcu_read_lock();
+ 6 r1 = READ_ONCE(x);
+ 7 r2 = READ_ONCE(y);
+ 8 rcu_read_unlock();
+ 9 }
+10
+11 void thread1(void)
+12 {
+13 WRITE_ONCE(x, 1);
+14 synchronize_rcu();
+15 WRITE_ONCE(y, 1);
+16 }
+</pre>
+</blockquote>
+
+<p>
+Because the <tt>synchronize_rcu()</tt> on line&nbsp;14 waits for
+all pre-existing readers, any instance of <tt>thread0()</tt> that
+loads a value of zero from <tt>x</tt> must complete before
+<tt>thread1()</tt> stores to <tt>y</tt>, so that instance must
+also load a value of zero from <tt>y</tt>.
+Similarly, any instance of <tt>thread0()</tt> that loads a value of
+one from <tt>y</tt> must have started after the
+<tt>synchronize_rcu()</tt> started, and must therefore also load
+a value of one from <tt>x</tt>.
+Therefore, the outcome:
+<blockquote>
+<pre>
+(r1 == 0 &amp;&amp; r2 == 1)
+</pre>
+</blockquote>
+cannot happen.
+
+<p><a name="Quick Quiz 1"><b>Quick Quiz 1</b>:</a>
+Wait a minute!
+You said that updaters can make useful forward progress concurrently
+with readers, but pre-existing readers will block
+<tt>synchronize_rcu()</tt>!!!
+Just who are you trying to fool???
+<br><a href="#qq1answer">Answer</a>
+
+<p>
+This scenario resembles one of the first uses of RCU in
+<a href="https://en.wikipedia.org/wiki/DYNIX">DYNIX/ptx</a>,
+which managed a distributed lock manager's transition into
+a state suitable for handling recovery from node failure,
+more or less as follows:
+
+<blockquote>
+<pre>
+ 1 #define STATE_NORMAL 0
+ 2 #define STATE_WANT_RECOVERY 1
+ 3 #define STATE_RECOVERING 2
+ 4 #define STATE_WANT_NORMAL 3
+ 5
+ 6 int state = STATE_NORMAL;
+ 7
+ 8 void do_something_dlm(void)
+ 9 {
+10 int state_snap;
+11
+12 rcu_read_lock();
+13 state_snap = READ_ONCE(state);
+14 if (state_snap == STATE_NORMAL)
+15 do_something();
+16 else
+17 do_something_carefully();
+18 rcu_read_unlock();
+19 }
+20
+21 void start_recovery(void)
+22 {
+23 WRITE_ONCE(state, STATE_WANT_RECOVERY);
+24 synchronize_rcu();
+25 WRITE_ONCE(state, STATE_RECOVERING);
+26 recovery();
+27 WRITE_ONCE(state, STATE_WANT_NORMAL);
+28 synchronize_rcu();
+29 WRITE_ONCE(state, STATE_NORMAL);
+30 }
+</pre>
+</blockquote>
+
+<p>
+The RCU read-side critical section in <tt>do_something_dlm()</tt>
+works with the <tt>synchronize_rcu()</tt> in <tt>start_recovery()</tt>
+to guarantee that <tt>do_something()</tt> never runs concurrently
+with <tt>recovery()</tt>, but with little or no synchronization
+overhead in <tt>do_something_dlm()</tt>.
+
+<p><a name="Quick Quiz 2"><b>Quick Quiz 2</b>:</a>
+Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
+<br><a href="#qq2answer">Answer</a>
+
+<p>
+In order to avoid fatal problems such as deadlocks,
+an RCU read-side critical section must not contain calls to
+<tt>synchronize_rcu()</tt>.
+Similarly, an RCU read-side critical section must not
+contain anything that waits, directly or indirectly, on completion of
+an invocation of <tt>synchronize_rcu()</tt>.
+
+<p>
+Although RCU's grace-period guarantee is useful in and of itself, with
+<a href="https://lwn.net/Articles/573497/">quite a few use cases</a>,
+it would be good to be able to use RCU to coordinate read-side
+access to linked data structures.
+For this, the grace-period guarantee is not sufficient, as can
+be seen in function <tt>add_gp_buggy()</tt> below.
+We will look at the reader's code later, but in the meantime, just think of
+the reader as locklessly picking up the <tt>gp</tt> pointer,
+and, if the value loaded is non-<tt>NULL</tt>, locklessly accessing the
+<tt>-&gt;a</tt> and <tt>-&gt;b</tt> fields.
+
+<blockquote>
+<pre>
+ 1 bool add_gp_buggy(int a, int b)
+ 2 {
+ 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4 if (!p)
+ 5 return -ENOMEM;
+ 6 spin_lock(&amp;gp_lock);
+ 7 if (rcu_access_pointer(gp)) {
+ 8 spin_unlock(&amp;gp_lock);
+ 9 return false;
+10 }
+11 p-&gt;a = a;
+12 p-&gt;b = a;
+13 gp = p; /* ORDERING BUG */
+14 spin_unlock(&amp;gp_lock);
+15 return true;
+16 }
+</pre>
+</blockquote>
+
+<p>
+The problem is that both the compiler and weakly ordered CPUs are within
+their rights to reorder this code as follows:
+
+<blockquote>
+<pre>
+ 1 bool add_gp_buggy_optimized(int a, int b)
+ 2 {
+ 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4 if (!p)
+ 5 return -ENOMEM;
+ 6 spin_lock(&amp;gp_lock);
+ 7 if (rcu_access_pointer(gp)) {
+ 8 spin_unlock(&amp;gp_lock);
+ 9 return false;
+10 }
+<b>11 gp = p; /* ORDERING BUG */
+12 p-&gt;a = a;
+13 p-&gt;b = a;</b>
+14 spin_unlock(&amp;gp_lock);
+15 return true;
+16 }
+</pre>
+</blockquote>
+
+<p>
+If an RCU reader fetches <tt>gp</tt> just after
+<tt>add_gp_buggy_optimized</tt> executes line&nbsp;11,
+it will see garbage in the <tt>-&gt;a</tt> and <tt>-&gt;b</tt>
+fields.
+And this is but one of many ways in which compiler and hardware optimizations
+could cause trouble.
+Therefore, we clearly need some way to prevent the compiler and the CPU from
+reordering in this manner, which brings us to the publish-subscribe
+guarantee discussed in the next section.
+
+<h3><a name="Publish-Subscribe Guarantee">Publish/Subscribe Guarantee</a></h3>
+
+<p>
+RCU's publish-subscribe guarantee allows data to be inserted
+into a linked data structure without disrupting RCU readers.
+The updater uses <tt>rcu_assign_pointer()</tt> to insert the
+new data, and readers use <tt>rcu_dereference()</tt> to
+access data, whether new or old.
+The following shows an example of insertion:
+
+<blockquote>
+<pre>
+ 1 bool add_gp(int a, int b)
+ 2 {
+ 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4 if (!p)
+ 5 return -ENOMEM;
+ 6 spin_lock(&amp;gp_lock);
+ 7 if (rcu_access_pointer(gp)) {
+ 8 spin_unlock(&amp;gp_lock);
+ 9 return false;
+10 }
+11 p-&gt;a = a;
+12 p-&gt;b = a;
+13 rcu_assign_pointer(gp, p);
+14 spin_unlock(&amp;gp_lock);
+15 return true;
+16 }
+</pre>
+</blockquote>
+
+<p>
+The <tt>rcu_assign_pointer()</tt> on line&nbsp;13 is conceptually
+equivalent to a simple assignment statement, but also guarantees
+that its assignment will
+happen after the two assignments in lines&nbsp;11 and&nbsp;12,
+similar to the C11 <tt>memory_order_release</tt> store operation.
+It also prevents any number of &ldquo;interesting&rdquo; compiler
+optimizations, for example, the use of <tt>gp</tt> as a scratch
+location immediately preceding the assignment.
+
+<p><a name="Quick Quiz 3"><b>Quick Quiz 3</b>:</a>
+But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
+two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
+from being reordered.
+Can't that also cause problems?
+<br><a href="#qq3answer">Answer</a>
+
+<p>
+It is tempting to assume that the reader need not do anything special
+to control its accesses to the RCU-protected data,
+as shown in <tt>do_something_gp_buggy()</tt> below:
+
+<blockquote>
+<pre>
+ 1 bool do_something_gp_buggy(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 p = gp; /* OPTIMIZATIONS GALORE!!! */
+ 5 if (p) {
+ 6 do_something(p-&gt;a, p-&gt;b);
+ 7 rcu_read_unlock();
+ 8 return true;
+ 9 }
+10 rcu_read_unlock();
+11 return false;
+12 }
+</pre>
+</blockquote>
+
+<p>
+However, this temptation must be resisted because there are a
+surprisingly large number of ways that the compiler
+(to say nothing of
+<a href="https://h71000.www7.hp.com/wizard/wiz_2637.html">DEC Alpha CPUs</a>)
+can trip this code up.
+For but one example, if the compiler were short of registers, it
+might choose to refetch from <tt>gp</tt> rather than keeping
+a separate copy in <tt>p</tt> as follows:
+
+<blockquote>
+<pre>
+ 1 bool do_something_gp_buggy_optimized(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 if (gp) { /* OPTIMIZATIONS GALORE!!! */
+<b> 5 do_something(gp-&gt;a, gp-&gt;b);</b>
+ 6 rcu_read_unlock();
+ 7 return true;
+ 8 }
+ 9 rcu_read_unlock();
+10 return false;
+11 }
+</pre>
+</blockquote>
+
+<p>
+If this function ran concurrently with a series of updates that
+replaced the current structure with a new one,
+the fetches of <tt>gp-&gt;a</tt>
+and <tt>gp-&gt;b</tt> might well come from two different structures,
+which could cause serious confusion.
+To prevent this (and much else besides), <tt>do_something_gp()</tt> uses
+<tt>rcu_dereference()</tt> to fetch from <tt>gp</tt>:
+
+<blockquote>
+<pre>
+ 1 bool do_something_gp(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 p = rcu_dereference(gp);
+ 5 if (p) {
+ 6 do_something(p-&gt;a, p-&gt;b);
+ 7 rcu_read_unlock();
+ 8 return true;
+ 9 }
+10 rcu_read_unlock();
+11 return false;
+12 }
+</pre>
+</blockquote>
+
+<p>
+The <tt>rcu_dereference()</tt> uses volatile casts and (for DEC Alpha)
+memory barriers in the Linux kernel.
+Should a
+<a href="http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf">high-quality implementation of C11 <tt>memory_order_consume</tt> [PDF]</a>
+ever appear, then <tt>rcu_dereference()</tt> could be implemented
+as a <tt>memory_order_consume</tt> load.
+Regardless of the exact implementation, a pointer fetched by
+<tt>rcu_dereference()</tt> may not be used outside of the
+outermost RCU read-side critical section containing that
+<tt>rcu_dereference()</tt>, unless protection of
+the corresponding data element has been passed from RCU to some
+other synchronization mechanism, most commonly locking or
+<a href="https://www.kernel.org/doc/Documentation/RCU/rcuref.txt">reference counting</a>.
+
+<p>
+In short, updaters use <tt>rcu_assign_pointer()</tt> and readers
+use <tt>rcu_dereference()</tt>, and these two RCU API elements
+work together to ensure that readers have a consistent view of
+newly added data elements.
+
+<p>
+Of course, it is also necessary to remove elements from RCU-protected
+data structures, for example, using the following process:
+
+<ol>
+<li> Remove the data element from the enclosing structure.
+<li> Wait for all pre-existing RCU read-side critical sections
+ to complete (because only pre-existing readers can possibly have
+ a reference to the newly removed data element).
+<li> At this point, only the updater has a reference to the
+ newly removed data element, so it can safely reclaim
+ the data element, for example, by passing it to <tt>kfree()</tt>.
+</ol>
+
+This process is implemented by <tt>remove_gp_synchronous()</tt>:
+
+<blockquote>
+<pre>
+ 1 bool remove_gp_synchronous(void)
+ 2 {
+ 3 struct foo *p;
+ 4
+ 5 spin_lock(&amp;gp_lock);
+ 6 p = rcu_access_pointer(gp);
+ 7 if (!p) {
+ 8 spin_unlock(&amp;gp_lock);
+ 9 return false;
+10 }
+11 rcu_assign_pointer(gp, NULL);
+12 spin_unlock(&amp;gp_lock);
+13 synchronize_rcu();
+14 kfree(p);
+15 return true;
+16 }
+</pre>
+</blockquote>
+
+<p>
+This function is straightforward, with line&nbsp;13 waiting for a grace
+period before line&nbsp;14 frees the old data element.
+This waiting ensures that readers will reach line&nbsp;7 of
+<tt>do_something_gp()</tt> before the data element referenced by
+<tt>p</tt> is freed.
+The <tt>rcu_access_pointer()</tt> on line&nbsp;6 is similar to
+<tt>rcu_dereference()</tt>, except that:
+
+<ol>
+<li> The value returned by <tt>rcu_access_pointer()</tt>
+ cannot be dereferenced.
+ If you want to access the value pointed to as well as
+ the pointer itself, use <tt>rcu_dereference()</tt>
+ instead of <tt>rcu_access_pointer()</tt>.
+<li> The call to <tt>rcu_access_pointer()</tt> need not be
+ protected.
+ In contrast, <tt>rcu_dereference()</tt> must either be
+ within an RCU read-side critical section or in a code
+ segment where the pointer cannot change, for example, in
+ code protected by the corresponding update-side lock.
+</ol>
+
+<p><a name="Quick Quiz 4"><b>Quick Quiz 4</b>:</a>
+Without the <tt>rcu_dereference()</tt> or the
+<tt>rcu_access_pointer()</tt>, what destructive optimizations
+might the compiler make use of?
+<br><a href="#qq4answer">Answer</a>
+
+<p>
+In short, RCU's publish-subscribe guarantee is provided by the combination
+of <tt>rcu_assign_pointer()</tt> and <tt>rcu_dereference()</tt>.
+This guarantee allows data elements to be safely added to RCU-protected
+linked data structures without disrupting RCU readers.
+This guarantee can be used in combination with the grace-period
+guarantee to also allow data elements to be removed from RCU-protected
+linked data structures, again without disrupting RCU readers.
+
+<p>
+This guarantee was only partially premeditated.
+DYNIX/ptx used an explicit memory barrier for publication, but had nothing
+resembling <tt>rcu_dereference()</tt> for subscription, nor did it
+have anything resembling the <tt>smp_read_barrier_depends()</tt>
+that was later subsumed into <tt>rcu_dereference()</tt>.
+The need for these operations made itself known quite suddenly at a
+late-1990s meeting with the DEC Alpha architects, back in the days when
+DEC was still a free-standing company.
+It took the Alpha architects a good hour to convince me that any sort
+of barrier would ever be needed, and it then took me a good <i>two</i> hours
+to convince them that their documentation did not make this point clear.
+More recent work with the C and C++ standards committees have provided
+much education on tricks and traps from the compiler.
+In short, compilers were much less tricky in the early 1990s, but in
+2015, don't even think about omitting <tt>rcu_dereference()</tt>!
+
+<h3><a name="Memory-Barrier Guarantees">Memory-Barrier Guarantees</a></h3>
+
+<p>
+The previous section's simple linked-data-structure scenario clearly
+demonstrates the need for RCU's stringent memory-ordering guarantees on
+systems with more than one CPU:
+
+<ol>
+<li> Each CPU that has an RCU read-side critical section that
+ begins before <tt>synchronize_rcu()</tt> starts is
+ guaranteed to execute a full memory barrier between the time
+ that the RCU read-side critical section ends and the time that
+ <tt>synchronize_rcu()</tt> returns.
+ Without this guarantee, a pre-existing RCU read-side critical section
+ might hold a reference to the newly removed <tt>struct foo</tt>
+ after the <tt>kfree()</tt> on line&nbsp;14 of
+ <tt>remove_gp_synchronous()</tt>.
+<li> Each CPU that has an RCU read-side critical section that ends
+ after <tt>synchronize_rcu()</tt> returns is guaranteed
+ to execute a full memory barrier between the time that
+ <tt>synchronize_rcu()</tt> begins and the time that the RCU
+ read-side critical section begins.
+ Without this guarantee, a later RCU read-side critical section
+ running after the <tt>kfree()</tt> on line&nbsp;14 of
+ <tt>remove_gp_synchronous()</tt> might
+ later run <tt>do_something_gp()</tt> and find the
+ newly deleted <tt>struct foo</tt>.
+<li> If the task invoking <tt>synchronize_rcu()</tt> remains
+ on a given CPU, then that CPU is guaranteed to execute a full
+ memory barrier sometime during the execution of
+ <tt>synchronize_rcu()</tt>.
+ This guarantee ensures that the <tt>kfree()</tt> on
+ line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
+ execute after the removal on line&nbsp;11.
+<li> If the task invoking <tt>synchronize_rcu()</tt> migrates
+ among a group of CPUs during that invocation, then each of the
+ CPUs in that group is guaranteed to execute a full memory barrier
+ sometime during the execution of <tt>synchronize_rcu()</tt>.
+ This guarantee also ensures that the <tt>kfree()</tt> on
+ line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
+ execute after the removal on
+ line&nbsp;11, but also in the case where the thread executing the
+ <tt>synchronize_rcu()</tt> migrates in the meantime.
+</ol>
+
+<p><a name="Quick Quiz 5"><b>Quick Quiz 5</b>:</a>
+Given that multiple CPUs can start RCU read-side critical sections
+at any time without any ordering whatsoever, how can RCU possibly tell whether
+or not a given RCU read-side critical section starts before a
+given instance of <tt>synchronize_rcu()</tt>?
+<br><a href="#qq5answer">Answer</a>
+
+<p><a name="Quick Quiz 6"><b>Quick Quiz 6</b>:</a>
+The first and second guarantees require unbelievably strict ordering!
+Are all these memory barriers <i> really</i> required?
+<br><a href="#qq6answer">Answer</a>
+
+<p>
+Note that these memory-barrier requirements do not replace the fundamental
+RCU requirement that a grace period wait for all pre-existing readers.
+On the contrary, the memory barriers called out in this section must operate in
+such a way as to <i>enforce</i> this fundamental requirement.
+Of course, different implementations enforce this requirement in different
+ways, but enforce it they must.
+
+<h3><a name="RCU Primitives Guaranteed to Execute Unconditionally">RCU Primitives Guaranteed to Execute Unconditionally</a></h3>
+
+<p>
+The common-case RCU primitives are unconditional.
+They are invoked, they do their job, and they return, with no possibility
+of error, and no need to retry.
+This is a key RCU design philosophy.
+
+<p>
+However, this philosophy is pragmatic rather than pigheaded.
+If someone comes up with a good justification for a particular conditional
+RCU primitive, it might well be implemented and added.
+After all, this guarantee was reverse-engineered, not premeditated.
+The unconditional nature of the RCU primitives was initially an
+accident of implementation, and later experience with synchronization
+primitives with conditional primitives caused me to elevate this
+accident to a guarantee.
+Therefore, the justification for adding a conditional primitive to
+RCU would need to be based on detailed and compelling use cases.
+
+<h3><a name="Guaranteed Read-to-Write Upgrade">Guaranteed Read-to-Write Upgrade</a></h3>
+
+<p>
+As far as RCU is concerned, it is always possible to carry out an
+update within an RCU read-side critical section.
+For example, that RCU read-side critical section might search for
+a given data element, and then might acquire the update-side
+spinlock in order to update that element, all while remaining
+in that RCU read-side critical section.
+Of course, it is necessary to exit the RCU read-side critical section
+before invoking <tt>synchronize_rcu()</tt>, however, this
+inconvenience can be avoided through use of the
+<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt> API members
+described later in this document.
+
+<p><a name="Quick Quiz 7"><b>Quick Quiz 7</b>:</a>
+But how does the upgrade-to-write operation exclude other readers?
+<br><a href="#qq7answer">Answer</a>
+
+<p>
+This guarantee allows lookup code to be shared between read-side
+and update-side code, and was premeditated, appearing in the earliest
+DYNIX/ptx RCU documentation.
+
+<h2><a name="Fundamental Non-Requirements">Fundamental Non-Requirements</a></h2>
+
+<p>
+RCU provides extremely lightweight readers, and its read-side guarantees,
+though quite useful, are correspondingly lightweight.
+It is therefore all too easy to assume that RCU is guaranteeing more
+than it really is.
+Of course, the list of things that RCU does not guarantee is infinitely
+long, however, the following sections list a few non-guarantees that
+have caused confusion.
+Except where otherwise noted, these non-guarantees were premeditated.
+
+<ol>
+<li> <a href="#Readers Impose Minimal Ordering">
+ Readers Impose Minimal Ordering</a>
+<li> <a href="#Readers Do Not Exclude Updaters">
+ Readers Do Not Exclude Updaters</a>
+<li> <a href="#Updaters Only Wait For Old Readers">
+ Updaters Only Wait For Old Readers</a>
+<li> <a href="#Grace Periods Don't Partition Read-Side Critical Sections">
+ Grace Periods Don't Partition Read-Side Critical Sections</a>
+<li> <a href="#Read-Side Critical Sections Don't Partition Grace Periods">
+ Read-Side Critical Sections Don't Partition Grace Periods</a>
+<li> <a href="#Disabling Preemption Does Not Block Grace Periods">
+ Disabling Preemption Does Not Block Grace Periods</a>
+</ol>
+
+<h3><a name="Readers Impose Minimal Ordering">Readers Impose Minimal Ordering</a></h3>
+
+<p>
+Reader-side markers such as <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> provide absolutely no ordering guarantees
+except through their interaction with the grace-period APIs such as
+<tt>synchronize_rcu()</tt>.
+To see this, consider the following pair of threads:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(x, 1);
+ 5 rcu_read_unlock();
+ 6 rcu_read_lock();
+ 7 WRITE_ONCE(y, 1);
+ 8 rcu_read_unlock();
+ 9 }
+10
+11 void thread1(void)
+12 {
+13 rcu_read_lock();
+14 r1 = READ_ONCE(y);
+15 rcu_read_unlock();
+16 rcu_read_lock();
+17 r2 = READ_ONCE(x);
+18 rcu_read_unlock();
+19 }
+</pre>
+</blockquote>
+
+<p>
+After <tt>thread0()</tt> and <tt>thread1()</tt> execute
+concurrently, it is quite possible to have
+
+<blockquote>
+<pre>
+(r1 == 1 &amp;&amp; r2 == 0)
+</pre>
+</blockquote>
+
+(that is, <tt>y</tt> appears to have been assigned before <tt>x</tt>),
+which would not be possible if <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> had much in the way of ordering
+properties.
+But they do not, so the CPU is within its rights
+to do significant reordering.
+This is by design: Any significant ordering constraints would slow down
+these fast-path APIs.
+
+<p><a name="Quick Quiz 8"><b>Quick Quiz 8</b>:</a>
+Can't the compiler also reorder this code?
+<br><a href="#qq8answer">Answer</a>
+
+<h3><a name="Readers Do Not Exclude Updaters">Readers Do Not Exclude Updaters</a></h3>
+
+<p>
+Neither <tt>rcu_read_lock()</tt> nor <tt>rcu_read_unlock()</tt>
+exclude updates.
+All they do is to prevent grace periods from ending.
+The following example illustrates this:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 r1 = READ_ONCE(y);
+ 5 if (r1) {
+ 6 do_something_with_nonzero_x();
+ 7 r2 = READ_ONCE(x);
+ 8 WARN_ON(!r2); /* BUG!!! */
+ 9 }
+10 rcu_read_unlock();
+11 }
+12
+13 void thread1(void)
+14 {
+15 spin_lock(&amp;my_lock);
+16 WRITE_ONCE(x, 1);
+17 WRITE_ONCE(y, 1);
+18 spin_unlock(&amp;my_lock);
+19 }
+</pre>
+</blockquote>
+
+<p>
+If the <tt>thread0()</tt> function's <tt>rcu_read_lock()</tt>
+excluded the <tt>thread1()</tt> function's update,
+the <tt>WARN_ON()</tt> could never fire.
+But the fact is that <tt>rcu_read_lock()</tt> does not exclude
+much of anything aside from subsequent grace periods, of which
+<tt>thread1()</tt> has none, so the
+<tt>WARN_ON()</tt> can and does fire.
+
+<h3><a name="Updaters Only Wait For Old Readers">Updaters Only Wait For Old Readers</a></h3>
+
+<p>
+It might be tempting to assume that after <tt>synchronize_rcu()</tt>
+completes, there are no readers executing.
+This temptation must be avoided because
+new readers can start immediately after <tt>synchronize_rcu()</tt>
+starts, and <tt>synchronize_rcu()</tt> is under no
+obligation to wait for these new readers.
+
+<p><a name="Quick Quiz 9"><b>Quick Quiz 9</b>:</a>
+Suppose that synchronize_rcu() did wait until all readers had completed.
+Would the updater be able to rely on this?
+<br><a href="#qq9answer">Answer</a>
+
+<h3><a name="Grace Periods Don't Partition Read-Side Critical Sections">
+Grace Periods Don't Partition Read-Side Critical Sections</a></h3>
+
+<p>
+It is tempting to assume that if any part of one RCU read-side critical
+section precedes a given grace period, and if any part of another RCU
+read-side critical section follows that same grace period, then all of
+the first RCU read-side critical section must precede all of the second.
+However, this just isn't the case: A single grace period does not
+partition the set of RCU read-side critical sections.
+An example of this situation can be illustrated as follows, where
+<tt>x</tt>, <tt>y</tt>, and <tt>z</tt> are initially all zero:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(a, 1);
+ 5 WRITE_ONCE(b, 1);
+ 6 rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11 r1 = READ_ONCE(a);
+12 synchronize_rcu();
+13 WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18 rcu_read_lock();
+19 r2 = READ_ONCE(b);
+20 r3 = READ_ONCE(c);
+21 rcu_read_unlock();
+22 }
+</pre>
+</blockquote>
+
+<p>
+It turns out that the outcome:
+
+<blockquote>
+<pre>
+(r1 == 1 &amp;&amp; r2 == 0 &amp;&amp; r3 == 1)
+</pre>
+</blockquote>
+
+is entirely possible.
+The following figure show how this can happen, with each circled
+<tt>QS</tt> indicating the point at which RCU recorded a
+<i>quiescent state</i> for each thread, that is, a state in which
+RCU knows that the thread cannot be in the midst of an RCU read-side
+critical section that started before the current grace period:
+
+<p><img src="GPpartitionReaders1.svg" alt="GPpartitionReaders1.svg" width="60%"></p>
+
+<p>
+If it is necessary to partition RCU read-side critical sections in this
+manner, it is necessary to use two grace periods, where the first
+grace period is known to end before the second grace period starts:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(a, 1);
+ 5 WRITE_ONCE(b, 1);
+ 6 rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11 r1 = READ_ONCE(a);
+12 synchronize_rcu();
+13 WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18 r2 = READ_ONCE(c);
+19 synchronize_rcu();
+20 WRITE_ONCE(d, 1);
+21 }
+22
+23 void thread3(void)
+24 {
+25 rcu_read_lock();
+26 r3 = READ_ONCE(b);
+27 r4 = READ_ONCE(d);
+28 rcu_read_unlock();
+29 }
+</pre>
+</blockquote>
+
+<p>
+Here, if <tt>(r1 == 1)</tt>, then
+<tt>thread0()</tt>'s write to <tt>b</tt> must happen
+before the end of <tt>thread1()</tt>'s grace period.
+If in addition <tt>(r4 == 1)</tt>, then
+<tt>thread3()</tt>'s read from <tt>b</tt> must happen
+after the beginning of <tt>thread2()</tt>'s grace period.
+If it is also the case that <tt>(r2 == 1)</tt>, then the
+end of <tt>thread1()</tt>'s grace period must precede the
+beginning of <tt>thread2()</tt>'s grace period.
+This mean that the two RCU read-side critical sections cannot overlap,
+guaranteeing that <tt>(r3 == 1)</tt>.
+As a result, the outcome:
+
+<blockquote>
+<pre>
+(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 0 &amp;&amp; r4 == 1)
+</pre>
+</blockquote>
+
+cannot happen.
+
+<p>
+This non-requirement was also non-premeditated, but became apparent
+when studying RCU's interaction with memory ordering.
+
+<h3><a name="Read-Side Critical Sections Don't Partition Grace Periods">
+Read-Side Critical Sections Don't Partition Grace Periods</a></h3>
+
+<p>
+It is also tempting to assume that if an RCU read-side critical section
+happens between a pair of grace periods, then those grace periods cannot
+overlap.
+However, this temptation leads nowhere good, as can be illustrated by
+the following, with all variables initially zero:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(a, 1);
+ 5 WRITE_ONCE(b, 1);
+ 6 rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11 r1 = READ_ONCE(a);
+12 synchronize_rcu();
+13 WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18 rcu_read_lock();
+19 WRITE_ONCE(d, 1);
+20 r2 = READ_ONCE(c);
+21 rcu_read_unlock();
+22 }
+23
+24 void thread3(void)
+25 {
+26 r3 = READ_ONCE(d);
+27 synchronize_rcu();
+28 WRITE_ONCE(e, 1);
+29 }
+30
+31 void thread4(void)
+32 {
+33 rcu_read_lock();
+34 r4 = READ_ONCE(b);
+35 r5 = READ_ONCE(e);
+36 rcu_read_unlock();
+37 }
+</pre>
+</blockquote>
+
+<p>
+In this case, the outcome:
+
+<blockquote>
+<pre>
+(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 1 &amp;&amp; r4 == 0 &amp&amp; r5 == 1)
+</pre>
+</blockquote>
+
+is entirely possible, as illustrated below:
+
+<p><img src="ReadersPartitionGP1.svg" alt="ReadersPartitionGP1.svg" width="100%"></p>
+
+<p>
+Again, an RCU read-side critical section can overlap almost all of a
+given grace period, just so long as it does not overlap the entire
+grace period.
+As a result, an RCU read-side critical section cannot partition a pair
+of RCU grace periods.
+
+<p><a name="Quick Quiz 10"><b>Quick Quiz 10</b>:</a>
+How long a sequence of grace periods, each separated by an RCU read-side
+critical section, would be required to partition the RCU read-side
+critical sections at the beginning and end of the chain?
+<br><a href="#qq10answer">Answer</a>
+
+<h3><a name="Disabling Preemption Does Not Block Grace Periods">
+Disabling Preemption Does Not Block Grace Periods</a></h3>
+
+<p>
+There was a time when disabling preemption on any given CPU would block
+subsequent grace periods.
+However, this was an accident of implementation and is not a requirement.
+And in the current Linux-kernel implementation, disabling preemption
+on a given CPU in fact does not block grace periods, as Oleg Nesterov
+<a href="https://lkml.kernel.org/g/20150614193825.GA19582@redhat.com">demonstrated</a>.
+
+<p>
+If you need a preempt-disable region to block grace periods, you need to add
+<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>, for example
+as follows:
+
+<blockquote>
+<pre>
+ 1 preempt_disable();
+ 2 rcu_read_lock();
+ 3 do_something();
+ 4 rcu_read_unlock();
+ 5 preempt_enable();
+ 6
+ 7 /* Spinlocks implicitly disable preemption. */
+ 8 spin_lock(&amp;mylock);
+ 9 rcu_read_lock();
+10 do_something();
+11 rcu_read_unlock();
+12 spin_unlock(&amp;mylock);
+</pre>
+</blockquote>
+
+<p>
+In theory, you could enter the RCU read-side critical section first,
+but it is more efficient to keep the entire RCU read-side critical
+section contained in the preempt-disable region as shown above.
+Of course, RCU read-side critical sections that extend outside of
+preempt-disable regions will work correctly, but such critical sections
+can be preempted, which forces <tt>rcu_read_unlock()</tt> to do
+more work.
+And no, this is <i>not</i> an invitation to enclose all of your RCU
+read-side critical sections within preempt-disable regions, because
+doing so would degrade real-time response.
+
+<p>
+This non-requirement appeared with preemptible RCU.
+If you need a grace period that waits on non-preemptible code regions, use
+<a href="#Sched Flavor">RCU-sched</a>.
+
+<h2><a name="Parallelism Facts of Life">Parallelism Facts of Life</a></h2>
+
+<p>
+These parallelism facts of life are by no means specific to RCU, but
+the RCU implementation must abide by them.
+They therefore bear repeating:
+
+<ol>
+<li> Any CPU or task may be delayed at any time,
+ and any attempts to avoid these delays by disabling
+ preemption, interrupts, or whatever are completely futile.
+ This is most obvious in preemptible user-level
+ environments and in virtualized environments (where
+ a given guest OS's VCPUs can be preempted at any time by
+ the underlying hypervisor), but can also happen in bare-metal
+ environments due to ECC errors, NMIs, and other hardware
+ events.
+ Although a delay of more than about 20 seconds can result
+ in splats, the RCU implementation is obligated to use
+ algorithms that can tolerate extremely long delays, but where
+ &ldquo;extremely long&rdquo; is not long enough to allow
+ wrap-around when incrementing a 64-bit counter.
+<li> Both the compiler and the CPU can reorder memory accesses.
+ Where it matters, RCU must use compiler directives and
+ memory-barrier instructions to preserve ordering.
+<li> Conflicting writes to memory locations in any given cache line
+ will result in expensive cache misses.
+ Greater numbers of concurrent writes and more-frequent
+ concurrent writes will result in more dramatic slowdowns.
+ RCU is therefore obligated to use algorithms that have
+ sufficient locality to avoid significant performance and
+ scalability problems.
+<li> As a rough rule of thumb, only one CPU's worth of processing
+ may be carried out under the protection of any given exclusive
+ lock.
+ RCU must therefore use scalable locking designs.
+<li> Counters are finite, especially on 32-bit systems.
+ RCU's use of counters must therefore tolerate counter wrap,
+ or be designed such that counter wrap would take way more
+ time than a single system is likely to run.
+ An uptime of ten years is quite possible, a runtime
+ of a century much less so.
+ As an example of the latter, RCU's dyntick-idle nesting counter
+ allows 54 bits for interrupt nesting level (this counter
+ is 64 bits even on a 32-bit system).
+ Overflowing this counter requires 2<sup>54</sup>
+ half-interrupts on a given CPU without that CPU ever going idle.
+ If a half-interrupt happened every microsecond, it would take
+ 570 years of runtime to overflow this counter, which is currently
+ believed to be an acceptably long time.
+<li> Linux systems can have thousands of CPUs running a single
+ Linux kernel in a single shared-memory environment.
+ RCU must therefore pay close attention to high-end scalability.
+</ol>
+
+<p>
+This last parallelism fact of life means that RCU must pay special
+attention to the preceding facts of life.
+The idea that Linux might scale to systems with thousands of CPUs would
+have been met with some skepticism in the 1990s, but these requirements
+would have otherwise have been unsurprising, even in the early 1990s.
+
+<h2><a name="Quality-of-Implementation Requirements">Quality-of-Implementation Requirements</a></h2>
+
+<p>
+These sections list quality-of-implementation requirements.
+Although an RCU implementation that ignores these requirements could
+still be used, it would likely be subject to limitations that would
+make it inappropriate for industrial-strength production use.
+Classes of quality-of-implementation requirements are as follows:
+
+<ol>
+<li> <a href="#Specialization">Specialization</a>
+<li> <a href="#Performance and Scalability">Performance and Scalability</a>
+<li> <a href="#Composability">Composability</a>
+<li> <a href="#Corner Cases">Corner Cases</a>
+</ol>
+
+<p>
+These classes is covered in the following sections.
+
+<h3><a name="Specialization">Specialization</a></h3>
+
+<p>
+RCU is and always has been intended primarily for read-mostly situations, as
+illustrated by the following figure.
+This means that RCU's read-side primitives are optimized, often at the
+expense of its update-side primitives.
+
+<p><img src="RCUApplicability.svg" alt="RCUApplicability.svg" width="70%"></p>
+
+<p>
+This focus on read-mostly situations means that RCU must interoperate
+with other synchronization primitives.
+For example, the <tt>add_gp()</tt> and <tt>remove_gp_synchronous()</tt>
+examples discussed earlier use RCU to protect readers and locking to
+coordinate updaters.
+However, the need extends much farther, requiring that a variety of
+synchronization primitives be legal within RCU read-side critical sections,
+including spinlocks, sequence locks, atomic operations, reference
+counters, and memory barriers.
+
+<p><a name="Quick Quiz 11"><b>Quick Quiz 11</b>:</a>
+What about sleeping locks?
+<br><a href="#qq11answer">Answer</a>
+
+<p>
+It often comes as a surprise that many algorithms do not require a
+consistent view of data, but many can function in that mode,
+with network routing being the poster child.
+Internet routing algorithms take significant time to propagate
+updates, so that by the time an update arrives at a given system,
+that system has been sending network traffic the wrong way for
+a considerable length of time.
+Having a few threads continue to send traffic the wrong way for a
+few more milliseconds is clearly not a problem: In the worst case,
+TCP retransmissions will eventually get the data where it needs to go.
+In general, when tracking the state of the universe outside of the
+computer, some level of inconsistency must be tolerated due to
+speed-of-light delays if nothing else.
+
+<p>
+Furthermore, uncertainty about external state is inherent in many cases.
+For example, a pair of veternarians might use heartbeat to determine
+whether or not a given cat was alive.
+But how long should they wait after the last heartbeat to decide that
+the cat is in fact dead?
+Waiting less than 400 milliseconds makes no sense because this would
+mean that a relaxed cat would be considered to cycle between death
+and life more than 100 times per minute.
+Moreover, just as with human beings, a cat's heart might stop for
+some period of time, so the exact wait period is a judgment call.
+One of our pair of veternarians might wait 30 seconds before pronouncing
+the cat dead, while the other might insist on waiting a full minute.
+The two veternarians would then disagree on the state of the cat during
+the final 30 seconds of the minute following the last heartbeat, as
+fancifully illustrated below:
+
+<p><img src="2013-08-is-it-dead.png" alt="2013-08-is-it-dead.png" width="431"></p>
+
+<p>
+Interestingly enough, this same situation applies to hardware.
+When push comes to shove, how do we tell whether or not some
+external server has failed?
+We send messages to it periodically, and declare it failed if we
+don't receive a response within a given period of time.
+Policy decisions can usually tolerate short
+periods of inconsistency.
+The policy was decided some time ago, and is only now being put into
+effect, so a few milliseconds of delay is normally inconsequential.
+
+<p>
+However, there are algorithms that absolutely must see consistent data.
+For example, the translation between a user-level SystemV semaphore
+ID to the corresponding in-kernel data structure is protected by RCU,
+but it is absolutely forbidden to update a semaphore that has just been
+removed.
+In the Linux kernel, this need for consistency is accommodated by acquiring
+spinlocks located in the in-kernel data structure from within
+the RCU read-side critical section, and this is indicated by the
+green box in the figure above.
+Many other techniques may be used, and are in fact used within the
+Linux kernel.
+
+<p>
+In short, RCU is not required to maintain consistency, and other
+mechanisms may be used in concert with RCU when consistency is required.
+RCU's specialization allows it to do its job extremely well, and its
+ability to interoperate with other synchronization mechanisms allows
+the right mix of synchronization tools to be used for a given job.
+
+<h3><a name="Performance and Scalability">Performance and Scalability</a></h3>
+
+<p>
+Energy efficiency is a critical component of performance today,
+and Linux-kernel RCU implementations must therefore avoid unnecessarily
+awakening idle CPUs.
+I cannot claim that this requirement was premeditated.
+In fact, I learned of it during a telephone conversation in which I
+was given &ldquo;frank and open&rdquo; feedback on the importance
+of energy efficiency in battery-powered systems and on specific
+energy-efficiency shortcomings of the Linux-kernel RCU implementation.
+In my experience, the battery-powered embedded community will consider
+any unnecessary wakeups to be extremely unfriendly acts.
+So much so that mere Linux-kernel-mailing-list posts are
+insufficient to vent their ire.
+
+<p>
+Memory consumption is not particularly important for in most
+situations, and has become decreasingly
+so as memory sizes have expanded and memory
+costs have plummeted.
+However, as I learned from Matt Mackall's
+<a href="http://elinux.org/Linux_Tiny-FAQ">bloatwatch</a>
+efforts, memory footprint is critically important on single-CPU systems with
+non-preemptible (<tt>CONFIG_PREEMPT=n</tt>) kernels, and thus
+<a href="https://lkml.kernel.org/g/20090113221724.GA15307@linux.vnet.ibm.com">tiny RCU</a>
+was born.
+Josh Triplett has since taken over the small-memory banner with his
+<a href="https://tiny.wiki.kernel.org/">Linux kernel tinification</a>
+project, which resulted in
+<a href="#Sleepable RCU">SRCU</a>
+becoming optional for those kernels not needing it.
+
+<p>
+The remaining performance requirements are, for the most part,
+unsurprising.
+For example, in keeping with RCU's read-side specialization,
+<tt>rcu_dereference()</tt> should have negligible overhead (for
+example, suppression of a few minor compiler optimizations).
+Similarly, in non-preemptible environments, <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> should have exactly zero overhead.
+
+<p>
+In preemptible environments, in the case where the RCU read-side
+critical section was not preempted (as will be the case for the
+highest-priority real-time process), <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> should have minimal overhead.
+In particular, they should not contain atomic read-modify-write
+operations, memory-barrier instructions, preemption disabling,
+interrupt disabling, or backwards branches.
+However, in the case where the RCU read-side critical section was preempted,
+<tt>rcu_read_unlock()</tt> may acquire spinlocks and disable interrupts.
+This is why it is better to nest an RCU read-side critical section
+within a preempt-disable region than vice versa, at least in cases
+where that critical section is short enough to avoid unduly degrading
+real-time latencies.
+
+<p>
+The <tt>synchronize_rcu()</tt> grace-period-wait primitive is
+optimized for throughput.
+It may therefore incur several milliseconds of latency in addition to
+the duration of the longest RCU read-side critical section.
+On the other hand, multiple concurrent invocations of
+<tt>synchronize_rcu()</tt> are required to use batching optimizations
+so that they can be satisfied by a single underlying grace-period-wait
+operation.
+For example, in the Linux kernel, it is not unusual for a single
+grace-period-wait operation to serve more than
+<a href="https://www.usenix.org/conference/2004-usenix-annual-technical-conference/making-rcu-safe-deep-sub-millisecond-response">1,000 separate invocations</a>
+of <tt>synchronize_rcu()</tt>, thus amortizing the per-invocation
+overhead down to nearly zero.
+However, the grace-period optimization is also required to avoid
+measurable degradation of real-time scheduling and interrupt latencies.
+
+<p>
+In some cases, the multi-millisecond <tt>synchronize_rcu()</tt>
+latencies are unacceptable.
+In these cases, <tt>synchronize_rcu_expedited()</tt> may be used
+instead, reducing the grace-period latency down to a few tens of
+microseconds on small systems, at least in cases where the RCU read-side
+critical sections are short.
+There are currently no special latency requirements for
+<tt>synchronize_rcu_expedited()</tt> on large systems, but,
+consistent with the empirical nature of the RCU specification,
+that is subject to change.
+However, there most definitely are scalability requirements:
+A storm of <tt>synchronize_rcu_expedited()</tt> invocations on 4096
+CPUs should at least make reasonable forward progress.
+In return for its shorter latencies, <tt>synchronize_rcu_expedited()</tt>
+is permitted to impose modest degradation of real-time latency
+on non-idle online CPUs.
+That said, it will likely be necessary to take further steps to reduce this
+degradation, hopefully to roughly that of a scheduling-clock interrupt.
+
+<p>
+There are a number of situations where even
+<tt>synchronize_rcu_expedited()</tt>'s reduced grace-period
+latency is unacceptable.
+In these situations, the asynchronous <tt>call_rcu()</tt> can be
+used in place of <tt>synchronize_rcu()</tt> as follows:
+
+<blockquote>
+<pre>
+ 1 struct foo {
+ 2 int a;
+ 3 int b;
+ 4 struct rcu_head rh;
+ 5 };
+ 6
+ 7 static void remove_gp_cb(struct rcu_head *rhp)
+ 8 {
+ 9 struct foo *p = container_of(rhp, struct foo, rh);
+10
+11 kfree(p);
+12 }
+13
+14 bool remove_gp_asynchronous(void)
+15 {
+16 struct foo *p;
+17
+18 spin_lock(&amp;gp_lock);
+19 p = rcu_dereference(gp);
+20 if (!p) {
+21 spin_unlock(&amp;gp_lock);
+22 return false;
+23 }
+24 rcu_assign_pointer(gp, NULL);
+25 call_rcu(&amp;p-&gt;rh, remove_gp_cb);
+26 spin_unlock(&amp;gp_lock);
+27 return true;
+28 }
+</pre>
+</blockquote>
+
+<p>
+A definition of <tt>struct foo</tt> is finally needed, and appears
+on lines&nbsp;1-5.
+The function <tt>remove_gp_cb()</tt> is passed to <tt>call_rcu()</tt>
+on line&nbsp;25, and will be invoked after the end of a subsequent
+grace period.
+This gets the same effect as <tt>remove_gp_synchronous()</tt>,
+but without forcing the updater to wait for a grace period to elapse.
+The <tt>call_rcu()</tt> function may be used in a number of
+situations where neither <tt>synchronize_rcu()</tt> nor
+<tt>synchronize_rcu_expedited()</tt> would be legal,
+including within preempt-disable code, <tt>local_bh_disable()</tt> code,
+interrupt-disable code, and interrupt handlers.
+However, even <tt>call_rcu()</tt> is illegal within NMI handlers.
+The callback function (<tt>remove_gp_cb()</tt> in this case) will be
+executed within softirq (software interrupt) environment within the
+Linux kernel,
+either within a real softirq handler or under the protection
+of <tt>local_bh_disable()</tt>.
+In both the Linux kernel and in userspace, it is bad practice to
+write an RCU callback function that takes too long.
+Long-running operations should be relegated to separate threads or
+(in the Linux kernel) workqueues.
+
+<p><a name="Quick Quiz 12"><b>Quick Quiz 12</b>:</a>
+Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
+After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
+structure, which would interact badly with concurrent insertions.
+Doesn't this mean that <tt>rcu_dereference()</tt> is required?
+<br><a href="#qq12answer">Answer</a>
+
+<p>
+However, all that <tt>remove_gp_cb()</tt> is doing is
+invoking <tt>kfree()</tt> on the data element.
+This is a common idiom, and is supported by <tt>kfree_rcu()</tt>,
+which allows &ldquo;fire and forget&rdquo; operation as shown below:
+
+<blockquote>
+<pre>
+ 1 struct foo {
+ 2 int a;
+ 3 int b;
+ 4 struct rcu_head rh;
+ 5 };
+ 6
+ 7 bool remove_gp_faf(void)
+ 8 {
+ 9 struct foo *p;
+10
+11 spin_lock(&amp;gp_lock);
+12 p = rcu_dereference(gp);
+13 if (!p) {
+14 spin_unlock(&amp;gp_lock);
+15 return false;
+16 }
+17 rcu_assign_pointer(gp, NULL);
+18 kfree_rcu(p, rh);
+19 spin_unlock(&amp;gp_lock);
+20 return true;
+21 }
+</pre>
+</blockquote>
+
+<p>
+Note that <tt>remove_gp_faf()</tt> simply invokes
+<tt>kfree_rcu()</tt> and proceeds, without any need to pay any
+further attention to the subsequent grace period and <tt>kfree()</tt>.
+It is permissible to invoke <tt>kfree_rcu()</tt> from the same
+environments as for <tt>call_rcu()</tt>.
+Interestingly enough, DYNIX/ptx had the equivalents of
+<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>, but not
+<tt>synchronize_rcu()</tt>.
+This was due to the fact that RCU was not heavily used within DYNIX/ptx,
+so the very few places that needed something like
+<tt>synchronize_rcu()</tt> simply open-coded it.
+
+<p><a name="Quick Quiz 13"><b>Quick Quiz 13</b>:</a>
+Earlier it was claimed that <tt>call_rcu()</tt> and
+<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
+by readers.
+But how can that be correct, given that the invocation of the callback
+and the freeing of the memory (respectively) must still wait for
+a grace period to elapse?
+<br><a href="#qq13answer">Answer</a>
+
+<p>
+But what if the updater must wait for the completion of code to be
+executed after the end of the grace period, but has other tasks
+that can be carried out in the meantime?
+The polling-style <tt>get_state_synchronize_rcu()</tt> and
+<tt>cond_synchronize_rcu()</tt> functions may be used for this
+purpose, as shown below:
+
+<blockquote>
+<pre>
+ 1 bool remove_gp_poll(void)
+ 2 {
+ 3 struct foo *p;
+ 4 unsigned long s;
+ 5
+ 6 spin_lock(&amp;gp_lock);
+ 7 p = rcu_access_pointer(gp);
+ 8 if (!p) {
+ 9 spin_unlock(&amp;gp_lock);
+10 return false;
+11 }
+12 rcu_assign_pointer(gp, NULL);
+13 spin_unlock(&amp;gp_lock);
+14 s = get_state_synchronize_rcu();
+15 do_something_while_waiting();
+16 cond_synchronize_rcu(s);
+17 kfree(p);
+18 return true;
+19 }
+</pre>
+</blockquote>
+
+<p>
+On line&nbsp;14, <tt>get_state_synchronize_rcu()</tt> obtains a
+&ldquo;cookie&rdquo; from RCU,
+then line&nbsp;15 carries out other tasks,
+and finally, line&nbsp;16 returns immediately if a grace period has
+elapsed in the meantime, but otherwise waits as required.
+The need for <tt>get_state_synchronize_rcu</tt> and
+<tt>cond_synchronize_rcu()</tt> has appeared quite recently,
+so it is too early to tell whether they will stand the test of time.
+
+<p>
+RCU thus provides a range of tools to allow updaters to strike the
+required tradeoff between latency, flexibility and CPU overhead.
+
+<h3><a name="Composability">Composability</a></h3>
+
+<p>
+Composability has received much attention in recent years, perhaps in part
+due to the collision of multicore hardware with object-oriented techniques
+designed in single-threaded environments for single-threaded use.
+And in theory, RCU read-side critical sections may be composed, and in
+fact may be nested arbitrarily deeply.
+In practice, as with all real-world implementations of composable
+constructs, there are limitations.
+
+<p>
+Implementations of RCU for which <tt>rcu_read_lock()</tt>
+and <tt>rcu_read_unlock()</tt> generate no code, such as
+Linux-kernel RCU when <tt>CONFIG_PREEMPT=n</tt>, can be
+nested arbitrarily deeply.
+After all, there is no overhead.
+Except that if all these instances of <tt>rcu_read_lock()</tt>
+and <tt>rcu_read_unlock()</tt> are visible to the compiler,
+compilation will eventually fail due to exhausting memory,
+mass storage, or user patience, whichever comes first.
+If the nesting is not visible to the compiler, as is the case with
+mutually recursive functions each in its own translation unit,
+stack overflow will result.
+If the nesting takes the form of loops, either the control variable
+will overflow or (in the Linux kernel) you will get an RCU CPU stall warning.
+Nevertheless, this class of RCU implementations is one
+of the most composable constructs in existence.
+
+<p>
+RCU implementations that explicitly track nesting depth
+are limited by the nesting-depth counter.
+For example, the Linux kernel's preemptible RCU limits nesting to
+<tt>INT_MAX</tt>.
+This should suffice for almost all practical purposes.
+That said, a consecutive pair of RCU read-side critical sections
+between which there is an operation that waits for a grace period
+cannot be enclosed in another RCU read-side critical section.
+This is because it is not legal to wait for a grace period within
+an RCU read-side critical section: To do so would result either
+in deadlock or
+in RCU implicitly splitting the enclosing RCU read-side critical
+section, neither of which is conducive to a long-lived and prosperous
+kernel.
+
+<p>
+It is worth noting that RCU is not alone in limiting composability.
+For example, many transactional-memory implementations prohibit
+composing a pair of transactions separated by an irrevocable
+operation (for example, a network receive operation).
+For another example, lock-based critical sections can be composed
+surprisingly freely, but only if deadlock is avoided.
+
+<p>
+In short, although RCU read-side critical sections are highly composable,
+care is required in some situations, just as is the case for any other
+composable synchronization mechanism.
+
+<h3><a name="Corner Cases">Corner Cases</a></h3>
+
+<p>
+A given RCU workload might have an endless and intense stream of
+RCU read-side critical sections, perhaps even so intense that there
+was never a point in time during which there was not at least one
+RCU read-side critical section in flight.
+RCU cannot allow this situation to block grace periods: As long as
+all the RCU read-side critical sections are finite, grace periods
+must also be finite.
+
+<p>
+That said, preemptible RCU implementations could potentially result
+in RCU read-side critical sections being preempted for long durations,
+which has the effect of creating a long-duration RCU read-side
+critical section.
+This situation can arise only in heavily loaded systems, but systems using
+real-time priorities are of course more vulnerable.
+Therefore, RCU priority boosting is provided to help deal with this
+case.
+That said, the exact requirements on RCU priority boosting will likely
+evolve as more experience accumulates.
+
+<p>
+Other workloads might have very high update rates.
+Although one can argue that such workloads should instead use
+something other than RCU, the fact remains that RCU must
+handle such workloads gracefully.
+This requirement is another factor driving batching of grace periods,
+but it is also the driving force behind the checks for large numbers
+of queued RCU callbacks in the <tt>call_rcu()</tt> code path.
+Finally, high update rates should not delay RCU read-side critical
+sections, although some read-side delays can occur when using
+<tt>synchronize_rcu_expedited()</tt>, courtesy of this function's use
+of <tt>try_stop_cpus()</tt>.
+(In the future, <tt>synchronize_rcu_expedited()</tt> will be
+converted to use lighter-weight inter-processor interrupts (IPIs),
+but this will still disturb readers, though to a much smaller degree.)
+
+<p>
+Although all three of these corner cases were understood in the early
+1990s, a simple user-level test consisting of <tt>close(open(path))</tt>
+in a tight loop
+in the early 2000s suddenly provided a much deeper appreciation of the
+high-update-rate corner case.
+This test also motivated addition of some RCU code to react to high update
+rates, for example, if a given CPU finds itself with more than 10,000
+RCU callbacks queued, it will cause RCU to take evasive action by
+more aggressively starting grace periods and more aggressively forcing
+completion of grace-period processing.
+This evasive action causes the grace period to complete more quickly,
+but at the cost of restricting RCU's batching optimizations, thus
+increasing the CPU overhead incurred by that grace period.
+
+<h2><a name="Software-Engineering Requirements">
+Software-Engineering Requirements</a></h2>
+
+<p>
+Between Murphy's Law and &ldquo;To err is human&rdquo;, it is necessary to
+guard against mishaps and misuse:
+
+<ol>
+<li> It is all too easy to forget to use <tt>rcu_read_lock()</tt>
+ everywhere that it is needed, so kernels built with
+ <tt>CONFIG_PROVE_RCU=y</tt> will spat if
+ <tt>rcu_dereference()</tt> is used outside of an
+ RCU read-side critical section.
+ Update-side code can use <tt>rcu_dereference_protected()</tt>,
+ which takes a
+ <a href="https://lwn.net/Articles/371986/">lockdep expression</a>
+ to indicate what is providing the protection.
+ If the indicated protection is not provided, a lockdep splat
+ is emitted.
+
+ <p>
+ Code shared between readers and updaters can use
+ <tt>rcu_dereference_check()</tt>, which also takes a
+ lockdep expression, and emits a lockdep splat if neither
+ <tt>rcu_read_lock()</tt> nor the indicated protection
+ is in place.
+ In addition, <tt>rcu_dereference_raw()</tt> is used in those
+ (hopefully rare) cases where the required protection cannot
+ be easily described.
+ Finally, <tt>rcu_read_lock_held()</tt> is provided to
+ allow a function to verify that it has been invoked within
+ an RCU read-side critical section.
+ I was made aware of this set of requirements shortly after Thomas
+ Gleixner audited a number of RCU uses.
+<li> A given function might wish to check for RCU-related preconditions
+ upon entry, before using any other RCU API.
+ The <tt>rcu_lockdep_assert()</tt> does this job,
+ asserting the expression in kernels having lockdep enabled
+ and doing nothing otherwise.
+<li> It is also easy to forget to use <tt>rcu_assign_pointer()</tt>
+ and <tt>rcu_dereference()</tt>, perhaps (incorrectly)
+ substituting a simple assignment.
+ To catch this sort of error, a given RCU-protected pointer may be
+ tagged with <tt>__rcu</tt>, after which running sparse
+ with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt> will complain
+ about simple-assignment accesses to that pointer.
+ Arnd Bergmann made me aware of this requirement, and also
+ supplied the needed
+ <a href="https://lwn.net/Articles/376011/">patch series</a>.
+<li> Kernels built with <tt>CONFIG_DEBUG_OBJECTS_RCU_HEAD=y</tt>
+ will splat if a data element is passed to <tt>call_rcu()</tt>
+ twice in a row, without a grace period in between.
+ (This error is similar to a double free.)
+ The corresponding <tt>rcu_head</tt> structures that are
+ dynamically allocated are automatically tracked, but
+ <tt>rcu_head</tt> structures allocated on the stack
+ must be initialized with <tt>init_rcu_head_on_stack()</tt>
+ and cleaned up with <tt>destroy_rcu_head_on_stack()</tt>.
+ Similarly, statically allocated non-stack <tt>rcu_head</tt>
+ structures must be initialized with <tt>init_rcu_head()</tt>
+ and cleaned up with <tt>destroy_rcu_head()</tt>.
+ Mathieu Desnoyers made me aware of this requirement, and also
+ supplied the needed
+ <a href="https://lkml.kernel.org/g/20100319013024.GA28456@Krystal">patch</a>.
+<li> An infinite loop in an RCU read-side critical section will
+ eventually trigger an RCU CPU stall warning splat, with
+ the duration of &ldquo;eventually&rdquo; being controlled by the
+ <tt>RCU_CPU_STALL_TIMEOUT</tt> <tt>Kconfig</tt> option, or,
+ alternatively, by the
+ <tt>rcupdate.rcu_cpu_stall_timeout</tt> boot/sysfs
+ parameter.
+ However, RCU is not obligated to produce this splat
+ unless there is a grace period waiting on that particular
+ RCU read-side critical section.
+ <p>
+ Some extreme workloads might intentionally delay
+ RCU grace periods, and systems running those workloads can
+ be booted with <tt>rcupdate.rcu_cpu_stall_suppress</tt>
+ to suppress the splats.
+ This kernel parameter may also be set via <tt>sysfs</tt>.
+ Furthermore, RCU CPU stall warnings are counter-productive
+ during sysrq dumps and during panics.
+ RCU therefore supplies the <tt>rcu_sysrq_start()</tt> and
+ <tt>rcu_sysrq_end()</tt> API members to be called before
+ and after long sysrq dumps.
+ RCU also supplies the <tt>rcu_panic()</tt> notifier that is
+ automatically invoked at the beginning of a panic to suppress
+ further RCU CPU stall warnings.
+
+ <p>
+ This requirement made itself known in the early 1990s, pretty
+ much the first time that it was necessary to debug a CPU stall.
+ That said, the initial implementation in DYNIX/ptx was quite
+ generic in comparison with that of Linux.
+<li> Although it would be very good to detect pointers leaking out
+ of RCU read-side critical sections, there is currently no
+ good way of doing this.
+ One complication is the need to distinguish between pointers
+ leaking and pointers that have been handed off from RCU to
+ some other synchronization mechanism, for example, reference
+ counting.
+<li> In kernels built with <tt>CONFIG_RCU_TRACE=y</tt>, RCU-related
+ information is provided via both debugfs and event tracing.
+<li> Open-coded use of <tt>rcu_assign_pointer()</tt> and
+ <tt>rcu_dereference()</tt> to create typical linked
+ data structures can be surprisingly error-prone.
+ Therefore, RCU-protected
+ <a href="https://lwn.net/Articles/609973/#RCU List APIs">linked lists</a>
+ and, more recently, RCU-protected
+ <a href="https://lwn.net/Articles/612100/">hash tables</a>
+ are available.
+ Many other special-purpose RCU-protected data structures are
+ available in the Linux kernel and the userspace RCU library.
+<li> Some linked structures are created at compile time, but still
+ require <tt>__rcu</tt> checking.
+ The <tt>RCU_POINTER_INITIALIZER()</tt> macro serves this
+ purpose.
+<li> It is not necessary to use <tt>rcu_assign_pointer()</tt>
+ when creating linked structures that are to be published via
+ a single external pointer.
+ The <tt>RCU_INIT_POINTER()</tt> macro is provided for
+ this task and also for assigning <tt>NULL</tt> pointers
+ at runtime.
+</ol>
+
+<p>
+This not a hard-and-fast list: RCU's diagnostic capabilities will
+continue to be guided by the number and type of usage bugs found
+in real-world RCU usage.
+
+<h2><a name="Linux Kernel Complications">Linux Kernel Complications</a></h2>
+
+<p>
+The Linux kernel provides an interesting environment for all kinds of
+software, including RCU.
+Some of the relevant points of interest are as follows:
+
+<ol>
+<li> <a href="#Configuration">Configuration</a>.
+<li> <a href="#Firmware Interface">Firmware Interface</a>.
+<li> <a href="#Early Boot">Early Boot</a>.
+<li> <a href="#Interrupts and NMIs">
+ Interrupts and non-maskable interrupts (NMIs)</a>.
+<li> <a href="#Loadable Modules">Loadable Modules</a>.
+<li> <a href="#Hotplug CPU">Hotplug CPU</a>.
+<li> <a href="#Scheduler and RCU">Scheduler and RCU</a>.
+<li> <a href="#Tracing and RCU">Tracing and RCU</a>.
+<li> <a href="#Energy Efficiency">Energy Efficiency</a>.
+<li> <a href="#Memory Efficiency">Memory Efficiency</a>.
+<li> <a href="#Performance, Scalability, Response Time, and Reliability">
+ Performance, Scalability, Response Time, and Reliability</a>.
+</ol>
+
+<p>
+This list is probably incomplete, but it does give a feel for the
+most notable Linux-kernel complications.
+Each of the following sections covers one of the above topics.
+
+<h3><a name="Configuration">Configuration</a></h3>
+
+<p>
+RCU's goal is automatic configuration, so that almost nobody
+needs to worry about RCU's <tt>Kconfig</tt> options.
+And for almost all users, RCU does in fact work well
+&ldquo;out of the box.&rdquo;
+
+<p>
+However, there are specialized use cases that are handled by
+kernel boot parameters and <tt>Kconfig</tt> options.
+Unfortunately, the <tt>Kconfig</tt> system will explicitly ask users
+about new <tt>Kconfig</tt> options, which requires almost all of them
+be hidden behind a <tt>CONFIG_RCU_EXPERT</tt> <tt>Kconfig</tt> option.
+
+<p>
+This all should be quite obvious, but the fact remains that
+Linus Torvalds recently had to
+<a href="https://lkml.kernel.org/g/CA+55aFy4wcCwaL4okTs8wXhGZ5h-ibecy_Meg9C4MNQrUnwMcg@mail.gmail.com">remind</a>
+me of this requirement.
+
+<h3><a name="Firmware Interface">Firmware Interface</a></h3>
+
+<p>
+In many cases, kernel obtains information about the system from the
+firmware, and sometimes things are lost in translation.
+Or the translation is accurate, but the original message is bogus.
+
+<p>
+For example, some systems' firmware overreports the number of CPUs,
+sometimes by a large factor.
+If RCU naively believed the firmware, as it used to do,
+it would create too many per-CPU kthreads.
+Although the resulting system will still run correctly, the extra
+kthreads needlessly consume memory and can cause confusion
+when they show up in <tt>ps</tt> listings.
+
+<p>
+RCU must therefore wait for a given CPU to actually come online before
+it can allow itself to believe that the CPU actually exists.
+The resulting &ldquo;ghost CPUs&rdquo; (which are never going to
+come online) cause a number of
+<a href="https://paulmck.livejournal.com/37494.html">interesting complications</a>.
+
+<h3><a name="Early Boot">Early Boot</a></h3>
+
+<p>
+The Linux kernel's boot sequence is an interesting process,
+and RCU is used early, even before <tt>rcu_init()</tt>
+is invoked.
+In fact, a number of RCU's primitives can be used as soon as the
+initial task's <tt>task_struct</tt> is available and the
+boot CPU's per-CPU variables are set up.
+The read-side primitives (<tt>rcu_read_lock()</tt>,
+<tt>rcu_read_unlock()</tt>, <tt>rcu_dereference()</tt>,
+and <tt>rcu_access_pointer()</tt>) will operate normally very early on,
+as will <tt>rcu_assign_pointer()</tt>.
+
+<p>
+Although <tt>call_rcu()</tt> may be invoked at any
+time during boot, callbacks are not guaranteed to be invoked until after
+the scheduler is fully up and running.
+This delay in callback invocation is due to the fact that RCU does not
+invoke callbacks until it is fully initialized, and this full initialization
+cannot occur until after the scheduler has initialized itself to the
+point where RCU can spawn and run its kthreads.
+In theory, it would be possible to invoke callbacks earlier,
+however, this is not a panacea because there would be severe restrictions
+on what operations those callbacks could invoke.
+
+<p>
+Perhaps surprisingly, <tt>synchronize_rcu()</tt>,
+<a href="#Bottom-Half Flavor"><tt>synchronize_rcu_bh()</tt></a>
+(<a href="#Bottom-Half Flavor">discussed below</a>),
+and
+<a href="#Sched Flavor"><tt>synchronize_sched()</tt></a>
+will all operate normally
+during very early boot, the reason being that there is only one CPU
+and preemption is disabled.
+This means that the call <tt>synchronize_rcu()</tt> (or friends)
+itself is a quiescent
+state and thus a grace period, so the early-boot implementation can
+be a no-op.
+
+<p>
+Both <tt>synchronize_rcu_bh()</tt> and <tt>synchronize_sched()</tt>
+continue to operate normally through the remainder of boot, courtesy
+of the fact that preemption is disabled across their RCU read-side
+critical sections and also courtesy of the fact that there is still
+only one CPU.
+However, once the scheduler starts initializing, preemption is enabled.
+There is still only a single CPU, but the fact that preemption is enabled
+means that the no-op implementation of <tt>synchronize_rcu()</tt> no
+longer works in <tt>CONFIG_PREEMPT=y</tt> kernels.
+Therefore, as soon as the scheduler starts initializing, the early-boot
+fastpath is disabled.
+This means that <tt>synchronize_rcu()</tt> switches to its runtime
+mode of operation where it posts callbacks, which in turn means that
+any call to <tt>synchronize_rcu()</tt> will block until the corresponding
+callback is invoked.
+Unfortunately, the callback cannot be invoked until RCU's runtime
+grace-period machinery is up and running, which cannot happen until
+the scheduler has initialized itself sufficiently to allow RCU's
+kthreads to be spawned.
+Therefore, invoking <tt>synchronize_rcu()</tt> during scheduler
+initialization can result in deadlock.
+
+<p><a name="Quick Quiz 14"><b>Quick Quiz 14</b>:</a>
+So what happens with <tt>synchronize_rcu()</tt> during
+scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
+kernels?
+<br><a href="#qq14answer">Answer</a>
+
+<p>
+I learned of these boot-time requirements as a result of a series of
+system hangs.
+
+<h3><a name="Interrupts and NMIs">Interrupts and NMIs</a></h3>
+
+<p>
+The Linux kernel has interrupts, and RCU read-side critical sections are
+legal within interrupt handlers and within interrupt-disabled regions
+of code, as are invocations of <tt>call_rcu()</tt>.
+
+<p>
+Some Linux-kernel architectures can enter an interrupt handler from
+non-idle process context, and then just never leave it, instead stealthily
+transitioning back to process context.
+This trick is sometimes used to invoke system calls from inside the kernel.
+These &ldquo;half-interrupts&rdquo; mean that RCU has to be very careful
+about how it counts interrupt nesting levels.
+I learned of this requirement the hard way during a rewrite
+of RCU's dyntick-idle code.
+
+<p>
+The Linux kernel has non-maskable interrupts (NMIs), and
+RCU read-side critical sections are legal within NMI handlers.
+Thankfully, RCU update-side primitives, including
+<tt>call_rcu()</tt>, are prohibited within NMI handlers.
+
+<p>
+The name notwithstanding, some Linux-kernel architectures
+can have nested NMIs, which RCU must handle correctly.
+Andy Lutomirski
+<a href="https://lkml.kernel.org/g/CALCETrXLq1y7e_dKFPgou-FKHB6Pu-r8+t-6Ds+8=va7anBWDA@mail.gmail.com">surprised me</a>
+with this requirement;
+he also kindly surprised me with
+<a href="https://lkml.kernel.org/g/CALCETrXSY9JpW3uE6H8WYk81sg56qasA2aqmjMPsq5dOtzso=g@mail.gmail.com">an algorithm</a>
+that meets this requirement.
+
+<h3><a name="Loadable Modules">Loadable Modules</a></h3>
+
+<p>
+The Linux kernel has loadable modules, and these modules can
+also be unloaded.
+After a given module has been unloaded, any attempt to call
+one of its functions results in a segmentation fault.
+The module-unload functions must therefore cancel any
+delayed calls to loadable-module functions, for example,
+any outstanding <tt>mod_timer()</tt> must be dealt with
+via <tt>del_timer_sync()</tt> or similar.
+
+<p>
+Unfortunately, there is no way to cancel an RCU callback;
+once you invoke <tt>call_rcu()</tt>, the callback function is
+going to eventually be invoked, unless the system goes down first.
+Because it is normally considered socially irresponsible to crash the system
+in response to a module unload request, we need some other way
+to deal with in-flight RCU callbacks.
+
+<p>
+RCU therefore provides
+<tt><a href="https://lwn.net/Articles/217484/">rcu_barrier()</a></tt>,
+which waits until all in-flight RCU callbacks have been invoked.
+If a module uses <tt>call_rcu()</tt>, its exit function should therefore
+prevent any future invocation of <tt>call_rcu()</tt>, then invoke
+<tt>rcu_barrier()</tt>.
+In theory, the underlying module-unload code could invoke
+<tt>rcu_barrier()</tt> unconditionally, but in practice this would
+incur unacceptable latencies.
+
+<p>
+Nikita Danilov noted this requirement for an analogous filesystem-unmount
+situation, and Dipankar Sarma incorporated <tt>rcu_barrier()</tt> into RCU.
+The need for <tt>rcu_barrier()</tt> for module unloading became
+apparent later.
+
+<h3><a name="Hotplug CPU">Hotplug CPU</a></h3>
+
+<p>
+The Linux kernel supports CPU hotplug, which means that CPUs
+can come and go.
+It is of course illegal to use any RCU API member from an offline CPU.
+This requirement was present from day one in DYNIX/ptx, but
+on the other hand, the Linux kernel's CPU-hotplug implementation
+is &ldquo;interesting.&rdquo;
+
+<p>
+The Linux-kernel CPU-hotplug implementation has notifiers that
+are used to allow the various kernel subsystems (including RCU)
+to respond appropriately to a given CPU-hotplug operation.
+Most RCU operations may be invoked from CPU-hotplug notifiers,
+including even normal synchronous grace-period operations
+such as <tt>synchronize_rcu()</tt>.
+However, expedited grace-period operations such as
+<tt>synchronize_rcu_expedited()</tt> are not supported,
+due to the fact that current implementations block CPU-hotplug
+operations, which could result in deadlock.
+
+<p>
+In addition, all-callback-wait operations such as
+<tt>rcu_barrier()</tt> are also not supported, due to the
+fact that there are phases of CPU-hotplug operations where
+the outgoing CPU's callbacks will not be invoked until after
+the CPU-hotplug operation ends, which could also result in deadlock.
+
+<h3><a name="Scheduler and RCU">Scheduler and RCU</a></h3>
+
+<p>
+RCU depends on the scheduler, and the scheduler uses RCU to
+protect some of its data structures.
+This means the scheduler is forbidden from acquiring
+the runqueue locks and the priority-inheritance locks
+in the middle of an outermost RCU read-side critical section unless either
+(1)&nbsp;it releases them before exiting that same
+RCU read-side critical section, or
+(2)&nbsp;interrupts are disabled across
+that entire RCU read-side critical section.
+This same prohibition also applies (recursively!) to any lock that is acquired
+while holding any lock to which this prohibition applies.
+Adhering to this rule prevents preemptible RCU from invoking
+<tt>rcu_read_unlock_special()</tt> while either runqueue or
+priority-inheritance locks are held, thus avoiding deadlock.
+
+<p>
+Prior to v4.4, it was only necessary to disable preemption across
+RCU read-side critical sections that acquired scheduler locks.
+In v4.4, expedited grace periods started using IPIs, and these
+IPIs could force a <tt>rcu_read_unlock()</tt> to take the slowpath.
+Therefore, this expedited-grace-period change required disabling of
+interrupts, not just preemption.
+
+<p>
+For RCU's part, the preemptible-RCU <tt>rcu_read_unlock()</tt>
+implementation must be written carefully to avoid similar deadlocks.
+In particular, <tt>rcu_read_unlock()</tt> must tolerate an
+interrupt where the interrupt handler invokes both
+<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
+This possibility requires <tt>rcu_read_unlock()</tt> to use
+negative nesting levels to avoid destructive recursion via
+interrupt handler's use of RCU.
+
+<p>
+This pair of mutual scheduler-RCU requirements came as a
+<a href="https://lwn.net/Articles/453002/">complete surprise</a>.
+
+<p>
+As noted above, RCU makes use of kthreads, and it is necessary to
+avoid excessive CPU-time accumulation by these kthreads.
+This requirement was no surprise, but RCU's violation of it
+when running context-switch-heavy workloads when built with
+<tt>CONFIG_NO_HZ_FULL=y</tt>
+<a href="http://www.rdrop.com/users/paulmck/scalability/paper/BareMetal.2015.01.15b.pdf">did come as a surprise [PDF]</a>.
+RCU has made good progress towards meeting this requirement, even
+for context-switch-have <tt>CONFIG_NO_HZ_FULL=y</tt> workloads,
+but there is room for further improvement.
+
+<h3><a name="Tracing and RCU">Tracing and RCU</a></h3>
+
+<p>
+It is possible to use tracing on RCU code, but tracing itself
+uses RCU.
+For this reason, <tt>rcu_dereference_raw_notrace()</tt>
+is provided for use by tracing, which avoids the destructive
+recursion that could otherwise ensue.
+This API is also used by virtualization in some architectures,
+where RCU readers execute in environments in which tracing
+cannot be used.
+The tracing folks both located the requirement and provided the
+needed fix, so this surprise requirement was relatively painless.
+
+<h3><a name="Energy Efficiency">Energy Efficiency</a></h3>
+
+<p>
+Interrupting idle CPUs is considered socially unacceptable,
+especially by people with battery-powered embedded systems.
+RCU therefore conserves energy by detecting which CPUs are
+idle, including tracking CPUs that have been interrupted from idle.
+This is a large part of the energy-efficiency requirement,
+so I learned of this via an irate phone call.
+
+<p>
+Because RCU avoids interrupting idle CPUs, it is illegal to
+execute an RCU read-side critical section on an idle CPU.
+(Kernels built with <tt>CONFIG_PROVE_RCU=y</tt> will splat
+if you try it.)
+The <tt>RCU_NONIDLE()</tt> macro and <tt>_rcuidle</tt>
+event tracing is provided to work around this restriction.
+In addition, <tt>rcu_is_watching()</tt> may be used to
+test whether or not it is currently legal to run RCU read-side
+critical sections on this CPU.
+I learned of the need for diagnostics on the one hand
+and <tt>RCU_NONIDLE()</tt> on the other while inspecting
+idle-loop code.
+Steven Rostedt supplied <tt>_rcuidle</tt> event tracing,
+which is used quite heavily in the idle loop.
+
+<p>
+It is similarly socially unacceptable to interrupt an
+<tt>nohz_full</tt> CPU running in userspace.
+RCU must therefore track <tt>nohz_full</tt> userspace
+execution.
+And in
+<a href="https://lwn.net/Articles/558284/"><tt>CONFIG_NO_HZ_FULL_SYSIDLE=y</tt></a>
+kernels, RCU must separately track idle CPUs on the one hand and
+CPUs that are either idle or executing in userspace on the other.
+In both cases, RCU must be able to sample state at two points in
+time, and be able to determine whether or not some other CPU spent
+any time idle and/or executing in userspace.
+
+<p>
+These energy-efficiency requirements have proven quite difficult to
+understand and to meet, for example, there have been more than five
+clean-sheet rewrites of RCU's energy-efficiency code, the last of
+which was finally able to demonstrate
+<a href="http://www.rdrop.com/users/paulmck/realtime/paper/AMPenergy.2013.04.19a.pdf">real energy savings running on real hardware [PDF]</a>.
+As noted earlier,
+I learned of many of these requirements via angry phone calls:
+Flaming me on the Linux-kernel mailing list was apparently not
+sufficient to fully vent their ire at RCU's energy-efficiency bugs!
+
+<h3><a name="Memory Efficiency">Memory Efficiency</a></h3>
+
+<p>
+Although small-memory non-realtime systems can simply use Tiny RCU,
+code size is only one aspect of memory efficiency.
+Another aspect is the size of the <tt>rcu_head</tt> structure
+used by <tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>.
+Although this structure contains nothing more than a pair of pointers,
+it does appear in many RCU-protected data structures, including
+some that are size critical.
+The <tt>page</tt> structure is a case in point, as evidenced by
+the many occurrences of the <tt>union</tt> keyword within that structure.
+
+<p>
+This need for memory efficiency is one reason that RCU uses hand-crafted
+singly linked lists to track the <tt>rcu_head</tt> structures that
+are waiting for a grace period to elapse.
+It is also the reason why <tt>rcu_head</tt> structures do not contain
+debug information, such as fields tracking the file and line of the
+<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> that posted them.
+Although this information might appear in debug-only kernel builds at some
+point, in the meantime, the <tt>-&gt;func</tt> field will often provide
+the needed debug information.
+
+<p>
+However, in some cases, the need for memory efficiency leads to even
+more extreme measures.
+Returning to the <tt>page</tt> structure, the <tt>rcu_head</tt> field
+shares storage with a great many other structures that are used at
+various points in the corresponding page's lifetime.
+In order to correctly resolve certain
+<a href="https://lkml.kernel.org/g/1439976106-137226-1-git-send-email-kirill.shutemov@linux.intel.com">race conditions</a>,
+the Linux kernel's memory-management subsystem needs a particular bit
+to remain zero during all phases of grace-period processing,
+and that bit happens to map to the bottom bit of the
+<tt>rcu_head</tt> structure's <tt>-&gt;next</tt> field.
+RCU makes this guarantee as long as <tt>call_rcu()</tt>
+is used to post the callback, as opposed to <tt>kfree_rcu()</tt>
+or some future &ldquo;lazy&rdquo;
+variant of <tt>call_rcu()</tt> that might one day be created for
+energy-efficiency purposes.
+
+<h3><a name="Performance, Scalability, Response Time, and Reliability">
+Performance, Scalability, Response Time, and Reliability</a></h3>
+
+<p>
+Expanding on the
+<a href="#Performance and Scalability">earlier discussion</a>,
+RCU is used heavily by hot code paths in performance-critical
+portions of the Linux kernel's networking, security, virtualization,
+and scheduling code paths.
+RCU must therefore use efficient implementations, especially in its
+read-side primitives.
+To that end, it would be good if preemptible RCU's implementation
+of <tt>rcu_read_lock()</tt> could be inlined, however, doing
+this requires resolving <tt>#include</tt> issues with the
+<tt>task_struct</tt> structure.
+
+<p>
+The Linux kernel supports hardware configurations with up to
+4096 CPUs, which means that RCU must be extremely scalable.
+Algorithms that involve frequent acquisitions of global locks or
+frequent atomic operations on global variables simply cannot be
+tolerated within the RCU implementation.
+RCU therefore makes heavy use of a combining tree based on the
+<tt>rcu_node</tt> structure.
+RCU is required to tolerate all CPUs continuously invoking any
+combination of RCU's runtime primitives with minimal per-operation
+overhead.
+In fact, in many cases, increasing load must <i>decrease</i> the
+per-operation overhead, witness the batching optimizations for
+<tt>synchronize_rcu()</tt>, <tt>call_rcu()</tt>,
+<tt>synchronize_rcu_expedited()</tt>, and <tt>rcu_barrier()</tt>.
+As a general rule, RCU must cheerfully accept whatever the
+rest of the Linux kernel decides to throw at it.
+
+<p>
+The Linux kernel is used for real-time workloads, especially
+in conjunction with the
+<a href="https://rt.wiki.kernel.org/index.php/Main_Page">-rt patchset</a>.
+The real-time-latency response requirements are such that the
+traditional approach of disabling preemption across RCU
+read-side critical sections is inappropriate.
+Kernels built with <tt>CONFIG_PREEMPT=y</tt> therefore
+use an RCU implementation that allows RCU read-side critical
+sections to be preempted.
+This requirement made its presence known after users made it
+clear that an earlier
+<a href="https://lwn.net/Articles/107930/">real-time patch</a>
+did not meet their needs, in conjunction with some
+<a href="https://lkml.kernel.org/g/20050318002026.GA2693@us.ibm.com">RCU issues</a>
+encountered by a very early version of the -rt patchset.
+
+<p>
+In addition, RCU must make do with a sub-100-microsecond real-time latency
+budget.
+In fact, on smaller systems with the -rt patchset, the Linux kernel
+provides sub-20-microsecond real-time latencies for the whole kernel,
+including RCU.
+RCU's scalability and latency must therefore be sufficient for
+these sorts of configurations.
+To my surprise, the sub-100-microsecond real-time latency budget
+<a href="http://www.rdrop.com/users/paulmck/realtime/paper/bigrt.2013.01.31a.LCA.pdf">
+applies to even the largest systems [PDF]</a>,
+up to and including systems with 4096 CPUs.
+This real-time requirement motivated the grace-period kthread, which
+also simplified handling of a number of race conditions.
+
+<p>
+Finally, RCU's status as a synchronization primitive means that
+any RCU failure can result in arbitrary memory corruption that can be
+extremely difficult to debug.
+This means that RCU must be extremely reliable, which in
+practice also means that RCU must have an aggressive stress-test
+suite.
+This stress-test suite is called <tt>rcutorture</tt>.
+
+<p>
+Although the need for <tt>rcutorture</tt> was no surprise,
+the current immense popularity of the Linux kernel is posing
+interesting&mdash;and perhaps unprecedented&mdash;validation
+challenges.
+To see this, keep in mind that there are well over one billion
+instances of the Linux kernel running today, given Android
+smartphones, Linux-powered televisions, and servers.
+This number can be expected to increase sharply with the advent of
+the celebrated Internet of Things.
+
+<p>
+Suppose that RCU contains a race condition that manifests on average
+once per million years of runtime.
+This bug will be occurring about three times per <i>day</i> across
+the installed base.
+RCU could simply hide behind hardware error rates, given that no one
+should really expect their smartphone to last for a million years.
+However, anyone taking too much comfort from this thought should
+consider the fact that in most jurisdictions, a successful multi-year
+test of a given mechanism, which might include a Linux kernel,
+suffices for a number of types of safety-critical certifications.
+In fact, rumor has it that the Linux kernel is already being used
+in production for safety-critical applications.
+I don't know about you, but I would feel quite bad if a bug in RCU
+killed someone.
+Which might explain my recent focus on validation and verification.
+
+<h2><a name="Other RCU Flavors">Other RCU Flavors</a></h2>
+
+<p>
+One of the more surprising things about RCU is that there are now
+no fewer than five <i>flavors</i>, or API families.
+In addition, the primary flavor that has been the sole focus up to
+this point has two different implementations, non-preemptible and
+preemptible.
+The other four flavors are listed below, with requirements for each
+described in a separate section.
+
+<ol>
+<li> <a href="#Bottom-Half Flavor">Bottom-Half Flavor</a>
+<li> <a href="#Sched Flavor">Sched Flavor</a>
+<li> <a href="#Sleepable RCU">Sleepable RCU</a>
+<li> <a href="#Tasks RCU">Tasks RCU</a>
+</ol>
+
+<h3><a name="Bottom-Half Flavor">Bottom-Half Flavor</a></h3>
+
+<p>
+The softirq-disable (AKA &ldquo;bottom-half&rdquo;,
+hence the &ldquo;_bh&rdquo; abbreviations)
+flavor of RCU, or <i>RCU-bh</i>, was developed by
+Dipankar Sarma to provide a flavor of RCU that could withstand the
+network-based denial-of-service attacks researched by Robert
+Olsson.
+These attacks placed so much networking load on the system
+that some of the CPUs never exited softirq execution,
+which in turn prevented those CPUs from ever executing a context switch,
+which, in the RCU implementation of that time, prevented grace periods
+from ever ending.
+The result was an out-of-memory condition and a system hang.
+
+<p>
+The solution was the creation of RCU-bh, which does
+<tt>local_bh_disable()</tt>
+across its read-side critical sections, and which uses the transition
+from one type of softirq processing to another as a quiescent state
+in addition to context switch, idle, user mode, and offline.
+This means that RCU-bh grace periods can complete even when some of
+the CPUs execute in softirq indefinitely, thus allowing algorithms
+based on RCU-bh to withstand network-based denial-of-service attacks.
+
+<p>
+Because
+<tt>rcu_read_lock_bh()</tt> and <tt>rcu_read_unlock_bh()</tt>
+disable and re-enable softirq handlers, any attempt to start a softirq
+handlers during the
+RCU-bh read-side critical section will be deferred.
+In this case, <tt>rcu_read_unlock_bh()</tt>
+will invoke softirq processing, which can take considerable time.
+One can of course argue that this softirq overhead should be associated
+with the code following the RCU-bh read-side critical section rather
+than <tt>rcu_read_unlock_bh()</tt>, but the fact
+is that most profiling tools cannot be expected to make this sort
+of fine distinction.
+For example, suppose that a three-millisecond-long RCU-bh read-side
+critical section executes during a time of heavy networking load.
+There will very likely be an attempt to invoke at least one softirq
+handler during that three milliseconds, but any such invocation will
+be delayed until the time of the <tt>rcu_read_unlock_bh()</tt>.
+This can of course make it appear at first glance as if
+<tt>rcu_read_unlock_bh()</tt> was executing very slowly.
+
+<p>
+The
+<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-bh API</a>
+includes
+<tt>rcu_read_lock_bh()</tt>,
+<tt>rcu_read_unlock_bh()</tt>,
+<tt>rcu_dereference_bh()</tt>,
+<tt>rcu_dereference_bh_check()</tt>,
+<tt>synchronize_rcu_bh()</tt>,
+<tt>synchronize_rcu_bh_expedited()</tt>,
+<tt>call_rcu_bh()</tt>,
+<tt>rcu_barrier_bh()</tt>, and
+<tt>rcu_read_lock_bh_held()</tt>.
+
+<h3><a name="Sched Flavor">Sched Flavor</a></h3>
+
+<p>
+Before preemptible RCU, waiting for an RCU grace period had the
+side effect of also waiting for all pre-existing interrupt
+and NMI handlers.
+However, there are legitimate preemptible-RCU implementations that
+do not have this property, given that any point in the code outside
+of an RCU read-side critical section can be a quiescent state.
+Therefore, <i>RCU-sched</i> was created, which follows &ldquo;classic&rdquo;
+RCU in that an RCU-sched grace period waits for for pre-existing
+interrupt and NMI handlers.
+In kernels built with <tt>CONFIG_PREEMPT=n</tt>, the RCU and RCU-sched
+APIs have identical implementations, while kernels built with
+<tt>CONFIG_PREEMPT=y</tt> provide a separate implementation for each.
+
+<p>
+Note well that in <tt>CONFIG_PREEMPT=y</tt> kernels,
+<tt>rcu_read_lock_sched()</tt> and <tt>rcu_read_unlock_sched()</tt>
+disable and re-enable preemption, respectively.
+This means that if there was a preemption attempt during the
+RCU-sched read-side critical section, <tt>rcu_read_unlock_sched()</tt>
+will enter the scheduler, with all the latency and overhead entailed.
+Just as with <tt>rcu_read_unlock_bh()</tt>, this can make it look
+as if <tt>rcu_read_unlock_sched()</tt> was executing very slowly.
+However, the highest-priority task won't be preempted, so that task
+will enjoy low-overhead <tt>rcu_read_unlock_sched()</tt> invocations.
+
+<p>
+The
+<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-sched API</a>
+includes
+<tt>rcu_read_lock_sched()</tt>,
+<tt>rcu_read_unlock_sched()</tt>,
+<tt>rcu_read_lock_sched_notrace()</tt>,
+<tt>rcu_read_unlock_sched_notrace()</tt>,
+<tt>rcu_dereference_sched()</tt>,
+<tt>rcu_dereference_sched_check()</tt>,
+<tt>synchronize_sched()</tt>,
+<tt>synchronize_rcu_sched_expedited()</tt>,
+<tt>call_rcu_sched()</tt>,
+<tt>rcu_barrier_sched()</tt>, and
+<tt>rcu_read_lock_sched_held()</tt>.
+However, anything that disables preemption also marks an RCU-sched
+read-side critical section, including
+<tt>preempt_disable()</tt> and <tt>preempt_enable()</tt>,
+<tt>local_irq_save()</tt> and <tt>local_irq_restore()</tt>,
+and so on.
+
+<h3><a name="Sleepable RCU">Sleepable RCU</a></h3>
+
+<p>
+For well over a decade, someone saying &ldquo;I need to block within
+an RCU read-side critical section&rdquo; was a reliable indication
+that this someone did not understand RCU.
+After all, if you are always blocking in an RCU read-side critical
+section, you can probably afford to use a higher-overhead synchronization
+mechanism.
+However, that changed with the advent of the Linux kernel's notifiers,
+whose RCU read-side critical
+sections almost never sleep, but sometimes need to.
+This resulted in the introduction of
+<a href="https://lwn.net/Articles/202847/">sleepable RCU</a>,
+or <i>SRCU</i>.
+
+<p>
+SRCU allows different domains to be defined, with each such domain
+defined by an instance of an <tt>srcu_struct</tt> structure.
+A pointer to this structure must be passed in to each SRCU function,
+for example, <tt>synchronize_srcu(&amp;ss)</tt>, where
+<tt>ss</tt> is the <tt>srcu_struct</tt> structure.
+The key benefit of these domains is that a slow SRCU reader in one
+domain does not delay an SRCU grace period in some other domain.
+That said, one consequence of these domains is that read-side code
+must pass a &ldquo;cookie&rdquo; from <tt>srcu_read_lock()</tt>
+to <tt>srcu_read_unlock()</tt>, for example, as follows:
+
+<blockquote>
+<pre>
+ 1 int idx;
+ 2
+ 3 idx = srcu_read_lock(&amp;ss);
+ 4 do_something();
+ 5 srcu_read_unlock(&amp;ss, idx);
+</pre>
+</blockquote>
+
+<p>
+As noted above, it is legal to block within SRCU read-side critical sections,
+however, with great power comes great responsibility.
+If you block forever in one of a given domain's SRCU read-side critical
+sections, then that domain's grace periods will also be blocked forever.
+Of course, one good way to block forever is to deadlock, which can
+happen if any operation in a given domain's SRCU read-side critical
+section can block waiting, either directly or indirectly, for that domain's
+grace period to elapse.
+For example, this results in a self-deadlock:
+
+<blockquote>
+<pre>
+ 1 int idx;
+ 2
+ 3 idx = srcu_read_lock(&amp;ss);
+ 4 do_something();
+ 5 synchronize_srcu(&amp;ss);
+ 6 srcu_read_unlock(&amp;ss, idx);
+</pre>
+</blockquote>
+
+<p>
+However, if line&nbsp;5 acquired a mutex that was held across
+a <tt>synchronize_srcu()</tt> for domain <tt>ss</tt>,
+deadlock would still be possible.
+Furthermore, if line&nbsp;5 acquired a mutex that was held across
+a <tt>synchronize_srcu()</tt> for some other domain <tt>ss1</tt>,
+and if an <tt>ss1</tt>-domain SRCU read-side critical section
+acquired another mutex that was held across as <tt>ss</tt>-domain
+<tt>synchronize_srcu()</tt>,
+deadlock would again be possible.
+Such a deadlock cycle could extend across an arbitrarily large number
+of different SRCU domains.
+Again, with great power comes great responsibility.
+
+<p>
+Unlike the other RCU flavors, SRCU read-side critical sections can
+run on idle and even offline CPUs.
+This ability requires that <tt>srcu_read_lock()</tt> and
+<tt>srcu_read_unlock()</tt> contain memory barriers, which means
+that SRCU readers will run a bit slower than would RCU readers.
+It also motivates the <tt>smp_mb__after_srcu_read_unlock()</tt>
+API, which, in combination with <tt>srcu_read_unlock()</tt>,
+guarantees a full memory barrier.
+
+<p>
+The
+<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">SRCU API</a>
+includes
+<tt>srcu_read_lock()</tt>,
+<tt>srcu_read_unlock()</tt>,
+<tt>srcu_dereference()</tt>,
+<tt>srcu_dereference_check()</tt>,
+<tt>synchronize_srcu()</tt>,
+<tt>synchronize_srcu_expedited()</tt>,
+<tt>call_srcu()</tt>,
+<tt>srcu_barrier()</tt>, and
+<tt>srcu_read_lock_held()</tt>.
+It also includes
+<tt>DEFINE_SRCU()</tt>,
+<tt>DEFINE_STATIC_SRCU()</tt>, and
+<tt>init_srcu_struct()</tt>
+APIs for defining and initializing <tt>srcu_struct</tt> structures.
+
+<h3><a name="Tasks RCU">Tasks RCU</a></h3>
+
+<p>
+Some forms of tracing use &ldquo;tramopolines&rdquo; to handle the
+binary rewriting required to install different types of probes.
+It would be good to be able to free old trampolines, which sounds
+like a job for some form of RCU.
+However, because it is necessary to be able to install a trace
+anywhere in the code, it is not possible to use read-side markers
+such as <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
+In addition, it does not work to have these markers in the trampoline
+itself, because there would need to be instructions following
+<tt>rcu_read_unlock()</tt>.
+Although <tt>synchronize_rcu()</tt> would guarantee that execution
+reached the <tt>rcu_read_unlock()</tt>, it would not be able to
+guarantee that execution had completely left the trampoline.
+
+<p>
+The solution, in the form of
+<a href="https://lwn.net/Articles/607117/"><i>Tasks RCU</i></a>,
+is to have implicit
+read-side critical sections that are delimited by voluntary context
+switches, that is, calls to <tt>schedule()</tt>,
+<tt>cond_resched_rcu_qs()</tt>, and
+<tt>synchronize_rcu_tasks()</tt>.
+In addition, transitions to and from userspace execution also delimit
+tasks-RCU read-side critical sections.
+
+<p>
+The tasks-RCU API is quite compact, consisting only of
+<tt>call_rcu_tasks()</tt>,
+<tt>synchronize_rcu_tasks()</tt>, and
+<tt>rcu_barrier_tasks()</tt>.
+
+<h2><a name="Possible Future Changes">Possible Future Changes</a></h2>
+
+<p>
+One of the tricks that RCU uses to attain update-side scalability is
+to increase grace-period latency with increasing numbers of CPUs.
+If this becomes a serious problem, it will be necessary to rework the
+grace-period state machine so as to avoid the need for the additional
+latency.
+
+<p>
+Expedited grace periods scan the CPUs, so their latency and overhead
+increases with increasing numbers of CPUs.
+If this becomes a serious problem on large systems, it will be necessary
+to do some redesign to avoid this scalability problem.
+
+<p>
+RCU disables CPU hotplug in a few places, perhaps most notably in the
+expedited grace-period and <tt>rcu_barrier()</tt> operations.
+If there is a strong reason to use expedited grace periods in CPU-hotplug
+notifiers, it will be necessary to avoid disabling CPU hotplug.
+This would introduce some complexity, so there had better be a <i>very</i>
+good reason.
+
+<p>
+The tradeoff between grace-period latency on the one hand and interruptions
+of other CPUs on the other hand may need to be re-examined.
+The desire is of course for zero grace-period latency as well as zero
+interprocessor interrupts undertaken during an expedited grace period
+operation.
+While this ideal is unlikely to be achievable, it is quite possible that
+further improvements can be made.
+
+<p>
+The multiprocessor implementations of RCU use a combining tree that
+groups CPUs so as to reduce lock contention and increase cache locality.
+However, this combining tree does not spread its memory across NUMA
+nodes nor does it align the CPU groups with hardware features such
+as sockets or cores.
+Such spreading and alignment is currently believed to be unnecessary
+because the hotpath read-side primitives do not access the combining
+tree, nor does <tt>call_rcu()</tt> in the common case.
+If you believe that your architecture needs such spreading and alignment,
+then your architecture should also benefit from the
+<tt>rcutree.rcu_fanout_leaf</tt> boot parameter, which can be set
+to the number of CPUs in a socket, NUMA node, or whatever.
+If the number of CPUs is too large, use a fraction of the number of
+CPUs.
+If the number of CPUs is a large prime number, well, that certainly
+is an &ldquo;interesting&rdquo; architectural choice!
+More flexible arrangements might be considered, but only if
+<tt>rcutree.rcu_fanout_leaf</tt> has proven inadequate, and only
+if the inadequacy has been demonstrated by a carefully run and
+realistic system-level workload.
+
+<p>
+Please note that arrangements that require RCU to remap CPU numbers will
+require extremely good demonstration of need and full exploration of
+alternatives.
+
+<p>
+There is an embarrassingly large number of flavors of RCU, and this
+number has been increasing over time.
+Perhaps it will be possible to combine some at some future date.
+
+<p>
+RCU's various kthreads are reasonably recent additions.
+It is quite likely that adjustments will be required to more gracefully
+handle extreme loads.
+It might also be necessary to be able to relate CPU utilization by
+RCU's kthreads and softirq handlers to the code that instigated this
+CPU utilization.
+For example, RCU callback overhead might be charged back to the
+originating <tt>call_rcu()</tt> instance, though probably not
+in production kernels.
+
+<h2><a name="Summary">Summary</a></h2>
+
+<p>
+This document has presented more than two decade's worth of RCU
+requirements.
+Given that the requirements keep changing, this will not be the last
+word on this subject, but at least it serves to get an important
+subset of the requirements set forth.
+
+<h2><a name="Acknowledgments">Acknowledgments</a></h2>
+
+I am grateful to Steven Rostedt, Lai Jiangshan, Ingo Molnar,
+Oleg Nesterov, Borislav Petkov, Peter Zijlstra, Boqun Feng, and
+Andy Lutomirski for their help in rendering
+this article human readable, and to Michelle Rankin for her support
+of this effort.
+Other contributions are acknowledged in the Linux kernel's git archive.
+The cartoon is copyright (c) 2013 by Melissa Broussard,
+and is provided
+under the terms of the Creative Commons Attribution-Share Alike 3.0
+United States license.
+
+<h3><a name="Answers to Quick Quizzes">
+Answers to Quick Quizzes</a></h3>
+
+<a name="qq1answer"></a>
+<p><b>Quick Quiz 1</b>:
+Wait a minute!
+You said that updaters can make useful forward progress concurrently
+with readers, but pre-existing readers will block
+<tt>synchronize_rcu()</tt>!!!
+Just who are you trying to fool???
+
+
+</p><p><b>Answer</b>:
+First, if updaters do not wish to be blocked by readers, they can use
+<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
+be discussed later.
+Second, even when using <tt>synchronize_rcu()</tt>, the other
+update-side code does run concurrently with readers, whether pre-existing
+or not.
+
+
+</p><p><a href="#Quick%20Quiz%201"><b>Back to Quick Quiz 1</b>.</a>
+
+<a name="qq2answer"></a>
+<p><b>Quick Quiz 2</b>:
+Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
+
+
+</p><p><b>Answer</b>:
+Without that extra grace period, memory reordering could result in
+<tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
+concurrently with the last bits of <tt>recovery()</tt>.
+
+
+</p><p><a href="#Quick%20Quiz%202"><b>Back to Quick Quiz 2</b>.</a>
+
+<a name="qq3answer"></a>
+<p><b>Quick Quiz 3</b>:
+But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
+two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
+from being reordered.
+Can't that also cause problems?
+
+
+</p><p><b>Answer</b>:
+No, it cannot.
+The readers cannot see either of these two fields until
+the assignment to <tt>gp</tt>, by which time both fields are
+fully initialized.
+So reordering the assignments
+to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt> cannot possibly
+cause any problems.
+
+
+</p><p><a href="#Quick%20Quiz%203"><b>Back to Quick Quiz 3</b>.</a>
+
+<a name="qq4answer"></a>
+<p><b>Quick Quiz 4</b>:
+Without the <tt>rcu_dereference()</tt> or the
+<tt>rcu_access_pointer()</tt>, what destructive optimizations
+might the compiler make use of?
+
+
+</p><p><b>Answer</b>:
+Let's start with what happens to <tt>do_something_gp()</tt>
+if it fails to use <tt>rcu_dereference()</tt>.
+It could reuse a value formerly fetched from this same pointer.
+It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
+manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
+mash-up of two distince pointer values.
+It might even use value-speculation optimizations, where it makes a wrong
+guess, but by the time it gets around to checking the value, an update
+has changed the pointer to match the wrong guess.
+Too bad about any dereferences that returned pre-initialization garbage
+in the meantime!
+
+<p>
+For <tt>remove_gp_synchronous()</tt>, as long as all modifications
+to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
+the above optimizations are harmless.
+However,
+with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
+<tt>sparse</tt> will complain if you
+define <tt>gp</tt> with <tt>__rcu</tt> and then
+access it without using
+either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
+
+
+</p><p><a href="#Quick%20Quiz%204"><b>Back to Quick Quiz 4</b>.</a>
+
+<a name="qq5answer"></a>
+<p><b>Quick Quiz 5</b>:
+Given that multiple CPUs can start RCU read-side critical sections
+at any time without any ordering whatsoever, how can RCU possibly tell whether
+or not a given RCU read-side critical section starts before a
+given instance of <tt>synchronize_rcu()</tt>?
+
+
+</p><p><b>Answer</b>:
+If RCU cannot tell whether or not a given
+RCU read-side critical section starts before a
+given instance of <tt>synchronize_rcu()</tt>,
+then it must assume that the RCU read-side critical section
+started first.
+In other words, a given instance of <tt>synchronize_rcu()</tt>
+can avoid waiting on a given RCU read-side critical section only
+if it can prove that <tt>synchronize_rcu()</tt> started first.
+
+
+</p><p><a href="#Quick%20Quiz%205"><b>Back to Quick Quiz 5</b>.</a>
+
+<a name="qq6answer"></a>
+<p><b>Quick Quiz 6</b>:
+The first and second guarantees require unbelievably strict ordering!
+Are all these memory barriers <i> really</i> required?
+
+
+</p><p><b>Answer</b>:
+Yes, they really are required.
+To see why the first guarantee is required, consider the following
+sequence of events:
+
+<ol>
+<li> CPU 1: <tt>rcu_read_lock()</tt>
+<li> CPU 1: <tt>q = rcu_dereference(gp);
+ /* Very likely to return p. */</tt>
+<li> CPU 0: <tt>list_del_rcu(p);</tt>
+<li> CPU 0: <tt>synchronize_rcu()</tt> starts.
+<li> CPU 1: <tt>do_something_with(q-&gt;a);
+ /* No smp_mb(), so might happen after kfree(). */</tt>
+<li> CPU 1: <tt>rcu_read_unlock()</tt>
+<li> CPU 0: <tt>synchronize_rcu()</tt> returns.
+<li> CPU 0: <tt>kfree(p);</tt>
+</ol>
+
+<p>
+Therefore, there absolutely must be a full memory barrier between the
+end of the RCU read-side critical section and the end of the
+grace period.
+
+<p>
+The sequence of events demonstrating the necessity of the second rule
+is roughly similar:
+
+<ol>
+<li> CPU 0: <tt>list_del_rcu(p);</tt>
+<li> CPU 0: <tt>synchronize_rcu()</tt> starts.
+<li> CPU 1: <tt>rcu_read_lock()</tt>
+<li> CPU 1: <tt>q = rcu_dereference(gp);
+ /* Might return p if no memory barrier. */</tt>
+<li> CPU 0: <tt>synchronize_rcu()</tt> returns.
+<li> CPU 0: <tt>kfree(p);</tt>
+<li> CPU 1: <tt>do_something_with(q-&gt;a); /* Boom!!! */</tt>
+<li> CPU 1: <tt>rcu_read_unlock()</tt>
+</ol>
+
+<p>
+And similarly, without a memory barrier between the beginning of the
+grace period and the beginning of the RCU read-side critical section,
+CPU&nbsp;1 might end up accessing the freelist.
+
+<p>
+The &ldquo;as if&rdquo; rule of course applies, so that any implementation
+that acts as if the appropriate memory barriers were in place is a
+correct implementation.
+That said, it is much easier to fool yourself into believing that you have
+adhered to the as-if rule than it is to actually adhere to it!
+
+
+</p><p><a href="#Quick%20Quiz%206"><b>Back to Quick Quiz 6</b>.</a>
+
+<a name="qq7answer"></a>
+<p><b>Quick Quiz 7</b>:
+But how does the upgrade-to-write operation exclude other readers?
+
+
+</p><p><b>Answer</b>:
+It doesn't, just like normal RCU updates, which also do not exclude
+RCU readers.
+
+
+</p><p><a href="#Quick%20Quiz%207"><b>Back to Quick Quiz 7</b>.</a>
+
+<a name="qq8answer"></a>
+<p><b>Quick Quiz 8</b>:
+Can't the compiler also reorder this code?
+
+
+</p><p><b>Answer</b>:
+No, the volatile casts in <tt>READ_ONCE()</tt> and
+<tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
+this particular case.
+
+
+</p><p><a href="#Quick%20Quiz%208"><b>Back to Quick Quiz 8</b>.</a>
+
+<a name="qq9answer"></a>
+<p><b>Quick Quiz 9</b>:
+Suppose that synchronize_rcu() did wait until all readers had completed.
+Would the updater be able to rely on this?
+
+
+</p><p><b>Answer</b>:
+No.
+Even if <tt>synchronize_rcu()</tt> were to wait until
+all readers had completed, a new reader might start immediately after
+<tt>synchronize_rcu()</tt> completed.
+Therefore, the code following
+<tt>synchronize_rcu()</tt> cannot rely on there being no readers
+in any case.
+
+
+</p><p><a href="#Quick%20Quiz%209"><b>Back to Quick Quiz 9</b>.</a>
+
+<a name="qq10answer"></a>
+<p><b>Quick Quiz 10</b>:
+How long a sequence of grace periods, each separated by an RCU read-side
+critical section, would be required to partition the RCU read-side
+critical sections at the beginning and end of the chain?
+
+
+</p><p><b>Answer</b>:
+In theory, an infinite number.
+In practice, an unknown number that is sensitive to both implementation
+details and timing considerations.
+Therefore, even in practice, RCU users must abide by the theoretical rather
+than the practical answer.
+
+
+</p><p><a href="#Quick%20Quiz%2010"><b>Back to Quick Quiz 10</b>.</a>
+
+<a name="qq11answer"></a>
+<p><b>Quick Quiz 11</b>:
+What about sleeping locks?
+
+
+</p><p><b>Answer</b>:
+These are forbidden within Linux-kernel RCU read-side critical sections
+because it is not legal to place a quiescent state (in this case,
+voluntary context switch) within an RCU read-side critical section.
+However, sleeping locks may be used within userspace RCU read-side critical
+sections, and also within Linux-kernel sleepable RCU
+<a href="#Sleepable RCU">(SRCU)</a>
+read-side critical sections.
+In addition, the -rt patchset turns spinlocks into a sleeping locks so
+that the corresponding critical sections can be preempted, which
+also means that these sleeplockified spinlocks (but not other sleeping locks!)
+may be acquire within -rt-Linux-kernel RCU read-side critical sections.
+
+<p>
+Note that it <i>is</i> legal for a normal RCU read-side critical section
+to conditionally acquire a sleeping locks (as in <tt>mutex_trylock()</tt>),
+but only as long as it does not loop indefinitely attempting to
+conditionally acquire that sleeping locks.
+The key point is that things like <tt>mutex_trylock()</tt>
+either return with the mutex held, or return an error indication if
+the mutex was not immediately available.
+Either way, <tt>mutex_trylock()</tt> returns immediately without sleeping.
+
+
+</p><p><a href="#Quick%20Quiz%2011"><b>Back to Quick Quiz 11</b>.</a>
+
+<a name="qq12answer"></a>
+<p><b>Quick Quiz 12</b>:
+Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
+After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
+structure, which would interact badly with concurrent insertions.
+Doesn't this mean that <tt>rcu_dereference()</tt> is required?
+
+
+</p><p><b>Answer</b>:
+Presumably the <tt>-&gt;gp_lock</tt> acquired on line&nbsp;18 excludes
+any changes, including any insertions that <tt>rcu_dereference()</tt>
+would protect against.
+Therefore, any insertions will be delayed until after <tt>-&gt;gp_lock</tt>
+is released on line&nbsp;25, which in turn means that
+<tt>rcu_access_pointer()</tt> suffices.
+
+
+</p><p><a href="#Quick%20Quiz%2012"><b>Back to Quick Quiz 12</b>.</a>
+
+<a name="qq13answer"></a>
+<p><b>Quick Quiz 13</b>:
+Earlier it was claimed that <tt>call_rcu()</tt> and
+<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
+by readers.
+But how can that be correct, given that the invocation of the callback
+and the freeing of the memory (respectively) must still wait for
+a grace period to elapse?
+
+
+</p><p><b>Answer</b>:
+We could define things this way, but keep in mind that this sort of
+definition would say that updates in garbage-collected languages
+cannot complete until the next time the garbage collector runs,
+which does not seem at all reasonable.
+The key point is that in most cases, an updater using either
+<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
+next update as soon as it has invoked <tt>call_rcu()</tt> or
+<tt>kfree_rcu()</tt>, without having to wait for a subsequent
+grace period.
+
+
+</p><p><a href="#Quick%20Quiz%2013"><b>Back to Quick Quiz 13</b>.</a>
+
+<a name="qq14answer"></a>
+<p><b>Quick Quiz 14</b>:
+So what happens with <tt>synchronize_rcu()</tt> during
+scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
+kernels?
+
+
+</p><p><b>Answer</b>:
+In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
+maps directly to <tt>synchronize_sched()</tt>.
+Therefore, <tt>synchronize_rcu()</tt> works normally throughout
+boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
+However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
+so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
+during scheduler initialization.
+
+
+</p><p><a href="#Quick%20Quiz%2014"><b>Back to Quick Quiz 14</b>.</a>
+
+
+</body></html>
diff --git a/Documentation/RCU/Design/Requirements/Requirements.htmlx b/Documentation/RCU/Design/Requirements/Requirements.htmlx
new file mode 100644
index 000000000000..3a97ba490c42
--- /dev/null
+++ b/Documentation/RCU/Design/Requirements/Requirements.htmlx
@@ -0,0 +1,2741 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+ "http://www.w3.org/TR/html4/loose.dtd">
+ <html>
+ <head><title>A Tour Through RCU's Requirements [LWN.net]</title>
+ <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+
+<h1>A Tour Through RCU's Requirements</h1>
+
+<p>Copyright IBM Corporation, 2015</p>
+<p>Author: Paul E.&nbsp;McKenney</p>
+<p><i>The initial version of this document appeared in the
+<a href="https://lwn.net/">LWN</a> articles
+<a href="https://lwn.net/Articles/652156/">here</a>,
+<a href="https://lwn.net/Articles/652677/">here</a>, and
+<a href="https://lwn.net/Articles/653326/">here</a>.</i></p>
+
+<h2>Introduction</h2>
+
+<p>
+Read-copy update (RCU) is a synchronization mechanism that is often
+used as a replacement for reader-writer locking.
+RCU is unusual in that updaters do not block readers,
+which means that RCU's read-side primitives can be exceedingly fast
+and scalable.
+In addition, updaters can make useful forward progress concurrently
+with readers.
+However, all this concurrency between RCU readers and updaters does raise
+the question of exactly what RCU readers are doing, which in turn
+raises the question of exactly what RCU's requirements are.
+
+<p>
+This document therefore summarizes RCU's requirements, and can be thought
+of as an informal, high-level specification for RCU.
+It is important to understand that RCU's specification is primarily
+empirical in nature;
+in fact, I learned about many of these requirements the hard way.
+This situation might cause some consternation, however, not only
+has this learning process been a lot of fun, but it has also been
+a great privilege to work with so many people willing to apply
+technologies in interesting new ways.
+
+<p>
+All that aside, here are the categories of currently known RCU requirements:
+</p>
+
+<ol>
+<li> <a href="#Fundamental Requirements">
+ Fundamental Requirements</a>
+<li> <a href="#Fundamental Non-Requirements">Fundamental Non-Requirements</a>
+<li> <a href="#Parallelism Facts of Life">
+ Parallelism Facts of Life</a>
+<li> <a href="#Quality-of-Implementation Requirements">
+ Quality-of-Implementation Requirements</a>
+<li> <a href="#Linux Kernel Complications">
+ Linux Kernel Complications</a>
+<li> <a href="#Software-Engineering Requirements">
+ Software-Engineering Requirements</a>
+<li> <a href="#Other RCU Flavors">
+ Other RCU Flavors</a>
+<li> <a href="#Possible Future Changes">
+ Possible Future Changes</a>
+</ol>
+
+<p>
+This is followed by a <a href="#Summary">summary</a>,
+which is in turn followed by the inevitable
+<a href="#Answers to Quick Quizzes">answers to the quick quizzes</a>.
+
+<h2><a name="Fundamental Requirements">Fundamental Requirements</a></h2>
+
+<p>
+RCU's fundamental requirements are the closest thing RCU has to hard
+mathematical requirements.
+These are:
+
+<ol>
+<li> <a href="#Grace-Period Guarantee">
+ Grace-Period Guarantee</a>
+<li> <a href="#Publish-Subscribe Guarantee">
+ Publish-Subscribe Guarantee</a>
+<li> <a href="#Memory-Barrier Guarantees">
+ Memory-Barrier Guarantees</a>
+<li> <a href="#RCU Primitives Guaranteed to Execute Unconditionally">
+ RCU Primitives Guaranteed to Execute Unconditionally</a>
+<li> <a href="#Guaranteed Read-to-Write Upgrade">
+ Guaranteed Read-to-Write Upgrade</a>
+</ol>
+
+<h3><a name="Grace-Period Guarantee">Grace-Period Guarantee</a></h3>
+
+<p>
+RCU's grace-period guarantee is unusual in being premeditated:
+Jack Slingwine and I had this guarantee firmly in mind when we started
+work on RCU (then called &ldquo;rclock&rdquo;) in the early 1990s.
+That said, the past two decades of experience with RCU have produced
+a much more detailed understanding of this guarantee.
+
+<p>
+RCU's grace-period guarantee allows updaters to wait for the completion
+of all pre-existing RCU read-side critical sections.
+An RCU read-side critical section
+begins with the marker <tt>rcu_read_lock()</tt> and ends with
+the marker <tt>rcu_read_unlock()</tt>.
+These markers may be nested, and RCU treats a nested set as one
+big RCU read-side critical section.
+Production-quality implementations of <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> are extremely lightweight, and in
+fact have exactly zero overhead in Linux kernels built for production
+use with <tt>CONFIG_PREEMPT=n</tt>.
+
+<p>
+This guarantee allows ordering to be enforced with extremely low
+overhead to readers, for example:
+
+<blockquote>
+<pre>
+ 1 int x, y;
+ 2
+ 3 void thread0(void)
+ 4 {
+ 5 rcu_read_lock();
+ 6 r1 = READ_ONCE(x);
+ 7 r2 = READ_ONCE(y);
+ 8 rcu_read_unlock();
+ 9 }
+10
+11 void thread1(void)
+12 {
+13 WRITE_ONCE(x, 1);
+14 synchronize_rcu();
+15 WRITE_ONCE(y, 1);
+16 }
+</pre>
+</blockquote>
+
+<p>
+Because the <tt>synchronize_rcu()</tt> on line&nbsp;14 waits for
+all pre-existing readers, any instance of <tt>thread0()</tt> that
+loads a value of zero from <tt>x</tt> must complete before
+<tt>thread1()</tt> stores to <tt>y</tt>, so that instance must
+also load a value of zero from <tt>y</tt>.
+Similarly, any instance of <tt>thread0()</tt> that loads a value of
+one from <tt>y</tt> must have started after the
+<tt>synchronize_rcu()</tt> started, and must therefore also load
+a value of one from <tt>x</tt>.
+Therefore, the outcome:
+<blockquote>
+<pre>
+(r1 == 0 &amp;&amp; r2 == 1)
+</pre>
+</blockquote>
+cannot happen.
+
+<p>@@QQ@@
+Wait a minute!
+You said that updaters can make useful forward progress concurrently
+with readers, but pre-existing readers will block
+<tt>synchronize_rcu()</tt>!!!
+Just who are you trying to fool???
+<p>@@QQA@@
+First, if updaters do not wish to be blocked by readers, they can use
+<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
+be discussed later.
+Second, even when using <tt>synchronize_rcu()</tt>, the other
+update-side code does run concurrently with readers, whether pre-existing
+or not.
+<p>@@QQE@@
+
+<p>
+This scenario resembles one of the first uses of RCU in
+<a href="https://en.wikipedia.org/wiki/DYNIX">DYNIX/ptx</a>,
+which managed a distributed lock manager's transition into
+a state suitable for handling recovery from node failure,
+more or less as follows:
+
+<blockquote>
+<pre>
+ 1 #define STATE_NORMAL 0
+ 2 #define STATE_WANT_RECOVERY 1
+ 3 #define STATE_RECOVERING 2
+ 4 #define STATE_WANT_NORMAL 3
+ 5
+ 6 int state = STATE_NORMAL;
+ 7
+ 8 void do_something_dlm(void)
+ 9 {
+10 int state_snap;
+11
+12 rcu_read_lock();
+13 state_snap = READ_ONCE(state);
+14 if (state_snap == STATE_NORMAL)
+15 do_something();
+16 else
+17 do_something_carefully();
+18 rcu_read_unlock();
+19 }
+20
+21 void start_recovery(void)
+22 {
+23 WRITE_ONCE(state, STATE_WANT_RECOVERY);
+24 synchronize_rcu();
+25 WRITE_ONCE(state, STATE_RECOVERING);
+26 recovery();
+27 WRITE_ONCE(state, STATE_WANT_NORMAL);
+28 synchronize_rcu();
+29 WRITE_ONCE(state, STATE_NORMAL);
+30 }
+</pre>
+</blockquote>
+
+<p>
+The RCU read-side critical section in <tt>do_something_dlm()</tt>
+works with the <tt>synchronize_rcu()</tt> in <tt>start_recovery()</tt>
+to guarantee that <tt>do_something()</tt> never runs concurrently
+with <tt>recovery()</tt>, but with little or no synchronization
+overhead in <tt>do_something_dlm()</tt>.
+
+<p>@@QQ@@
+Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
+<p>@@QQA@@
+Without that extra grace period, memory reordering could result in
+<tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
+concurrently with the last bits of <tt>recovery()</tt>.
+<p>@@QQE@@
+
+<p>
+In order to avoid fatal problems such as deadlocks,
+an RCU read-side critical section must not contain calls to
+<tt>synchronize_rcu()</tt>.
+Similarly, an RCU read-side critical section must not
+contain anything that waits, directly or indirectly, on completion of
+an invocation of <tt>synchronize_rcu()</tt>.
+
+<p>
+Although RCU's grace-period guarantee is useful in and of itself, with
+<a href="https://lwn.net/Articles/573497/">quite a few use cases</a>,
+it would be good to be able to use RCU to coordinate read-side
+access to linked data structures.
+For this, the grace-period guarantee is not sufficient, as can
+be seen in function <tt>add_gp_buggy()</tt> below.
+We will look at the reader's code later, but in the meantime, just think of
+the reader as locklessly picking up the <tt>gp</tt> pointer,
+and, if the value loaded is non-<tt>NULL</tt>, locklessly accessing the
+<tt>-&gt;a</tt> and <tt>-&gt;b</tt> fields.
+
+<blockquote>
+<pre>
+ 1 bool add_gp_buggy(int a, int b)
+ 2 {
+ 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4 if (!p)
+ 5 return -ENOMEM;
+ 6 spin_lock(&amp;gp_lock);
+ 7 if (rcu_access_pointer(gp)) {
+ 8 spin_unlock(&amp;gp_lock);
+ 9 return false;
+10 }
+11 p-&gt;a = a;
+12 p-&gt;b = a;
+13 gp = p; /* ORDERING BUG */
+14 spin_unlock(&amp;gp_lock);
+15 return true;
+16 }
+</pre>
+</blockquote>
+
+<p>
+The problem is that both the compiler and weakly ordered CPUs are within
+their rights to reorder this code as follows:
+
+<blockquote>
+<pre>
+ 1 bool add_gp_buggy_optimized(int a, int b)
+ 2 {
+ 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4 if (!p)
+ 5 return -ENOMEM;
+ 6 spin_lock(&amp;gp_lock);
+ 7 if (rcu_access_pointer(gp)) {
+ 8 spin_unlock(&amp;gp_lock);
+ 9 return false;
+10 }
+<b>11 gp = p; /* ORDERING BUG */
+12 p-&gt;a = a;
+13 p-&gt;b = a;</b>
+14 spin_unlock(&amp;gp_lock);
+15 return true;
+16 }
+</pre>
+</blockquote>
+
+<p>
+If an RCU reader fetches <tt>gp</tt> just after
+<tt>add_gp_buggy_optimized</tt> executes line&nbsp;11,
+it will see garbage in the <tt>-&gt;a</tt> and <tt>-&gt;b</tt>
+fields.
+And this is but one of many ways in which compiler and hardware optimizations
+could cause trouble.
+Therefore, we clearly need some way to prevent the compiler and the CPU from
+reordering in this manner, which brings us to the publish-subscribe
+guarantee discussed in the next section.
+
+<h3><a name="Publish-Subscribe Guarantee">Publish/Subscribe Guarantee</a></h3>
+
+<p>
+RCU's publish-subscribe guarantee allows data to be inserted
+into a linked data structure without disrupting RCU readers.
+The updater uses <tt>rcu_assign_pointer()</tt> to insert the
+new data, and readers use <tt>rcu_dereference()</tt> to
+access data, whether new or old.
+The following shows an example of insertion:
+
+<blockquote>
+<pre>
+ 1 bool add_gp(int a, int b)
+ 2 {
+ 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4 if (!p)
+ 5 return -ENOMEM;
+ 6 spin_lock(&amp;gp_lock);
+ 7 if (rcu_access_pointer(gp)) {
+ 8 spin_unlock(&amp;gp_lock);
+ 9 return false;
+10 }
+11 p-&gt;a = a;
+12 p-&gt;b = a;
+13 rcu_assign_pointer(gp, p);
+14 spin_unlock(&amp;gp_lock);
+15 return true;
+16 }
+</pre>
+</blockquote>
+
+<p>
+The <tt>rcu_assign_pointer()</tt> on line&nbsp;13 is conceptually
+equivalent to a simple assignment statement, but also guarantees
+that its assignment will
+happen after the two assignments in lines&nbsp;11 and&nbsp;12,
+similar to the C11 <tt>memory_order_release</tt> store operation.
+It also prevents any number of &ldquo;interesting&rdquo; compiler
+optimizations, for example, the use of <tt>gp</tt> as a scratch
+location immediately preceding the assignment.
+
+<p>@@QQ@@
+But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
+two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
+from being reordered.
+Can't that also cause problems?
+<p>@@QQA@@
+No, it cannot.
+The readers cannot see either of these two fields until
+the assignment to <tt>gp</tt>, by which time both fields are
+fully initialized.
+So reordering the assignments
+to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt> cannot possibly
+cause any problems.
+<p>@@QQE@@
+
+<p>
+It is tempting to assume that the reader need not do anything special
+to control its accesses to the RCU-protected data,
+as shown in <tt>do_something_gp_buggy()</tt> below:
+
+<blockquote>
+<pre>
+ 1 bool do_something_gp_buggy(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 p = gp; /* OPTIMIZATIONS GALORE!!! */
+ 5 if (p) {
+ 6 do_something(p-&gt;a, p-&gt;b);
+ 7 rcu_read_unlock();
+ 8 return true;
+ 9 }
+10 rcu_read_unlock();
+11 return false;
+12 }
+</pre>
+</blockquote>
+
+<p>
+However, this temptation must be resisted because there are a
+surprisingly large number of ways that the compiler
+(to say nothing of
+<a href="https://h71000.www7.hp.com/wizard/wiz_2637.html">DEC Alpha CPUs</a>)
+can trip this code up.
+For but one example, if the compiler were short of registers, it
+might choose to refetch from <tt>gp</tt> rather than keeping
+a separate copy in <tt>p</tt> as follows:
+
+<blockquote>
+<pre>
+ 1 bool do_something_gp_buggy_optimized(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 if (gp) { /* OPTIMIZATIONS GALORE!!! */
+<b> 5 do_something(gp-&gt;a, gp-&gt;b);</b>
+ 6 rcu_read_unlock();
+ 7 return true;
+ 8 }
+ 9 rcu_read_unlock();
+10 return false;
+11 }
+</pre>
+</blockquote>
+
+<p>
+If this function ran concurrently with a series of updates that
+replaced the current structure with a new one,
+the fetches of <tt>gp-&gt;a</tt>
+and <tt>gp-&gt;b</tt> might well come from two different structures,
+which could cause serious confusion.
+To prevent this (and much else besides), <tt>do_something_gp()</tt> uses
+<tt>rcu_dereference()</tt> to fetch from <tt>gp</tt>:
+
+<blockquote>
+<pre>
+ 1 bool do_something_gp(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 p = rcu_dereference(gp);
+ 5 if (p) {
+ 6 do_something(p-&gt;a, p-&gt;b);
+ 7 rcu_read_unlock();
+ 8 return true;
+ 9 }
+10 rcu_read_unlock();
+11 return false;
+12 }
+</pre>
+</blockquote>
+
+<p>
+The <tt>rcu_dereference()</tt> uses volatile casts and (for DEC Alpha)
+memory barriers in the Linux kernel.
+Should a
+<a href="http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf">high-quality implementation of C11 <tt>memory_order_consume</tt> [PDF]</a>
+ever appear, then <tt>rcu_dereference()</tt> could be implemented
+as a <tt>memory_order_consume</tt> load.
+Regardless of the exact implementation, a pointer fetched by
+<tt>rcu_dereference()</tt> may not be used outside of the
+outermost RCU read-side critical section containing that
+<tt>rcu_dereference()</tt>, unless protection of
+the corresponding data element has been passed from RCU to some
+other synchronization mechanism, most commonly locking or
+<a href="https://www.kernel.org/doc/Documentation/RCU/rcuref.txt">reference counting</a>.
+
+<p>
+In short, updaters use <tt>rcu_assign_pointer()</tt> and readers
+use <tt>rcu_dereference()</tt>, and these two RCU API elements
+work together to ensure that readers have a consistent view of
+newly added data elements.
+
+<p>
+Of course, it is also necessary to remove elements from RCU-protected
+data structures, for example, using the following process:
+
+<ol>
+<li> Remove the data element from the enclosing structure.
+<li> Wait for all pre-existing RCU read-side critical sections
+ to complete (because only pre-existing readers can possibly have
+ a reference to the newly removed data element).
+<li> At this point, only the updater has a reference to the
+ newly removed data element, so it can safely reclaim
+ the data element, for example, by passing it to <tt>kfree()</tt>.
+</ol>
+
+This process is implemented by <tt>remove_gp_synchronous()</tt>:
+
+<blockquote>
+<pre>
+ 1 bool remove_gp_synchronous(void)
+ 2 {
+ 3 struct foo *p;
+ 4
+ 5 spin_lock(&amp;gp_lock);
+ 6 p = rcu_access_pointer(gp);
+ 7 if (!p) {
+ 8 spin_unlock(&amp;gp_lock);
+ 9 return false;
+10 }
+11 rcu_assign_pointer(gp, NULL);
+12 spin_unlock(&amp;gp_lock);
+13 synchronize_rcu();
+14 kfree(p);
+15 return true;
+16 }
+</pre>
+</blockquote>
+
+<p>
+This function is straightforward, with line&nbsp;13 waiting for a grace
+period before line&nbsp;14 frees the old data element.
+This waiting ensures that readers will reach line&nbsp;7 of
+<tt>do_something_gp()</tt> before the data element referenced by
+<tt>p</tt> is freed.
+The <tt>rcu_access_pointer()</tt> on line&nbsp;6 is similar to
+<tt>rcu_dereference()</tt>, except that:
+
+<ol>
+<li> The value returned by <tt>rcu_access_pointer()</tt>
+ cannot be dereferenced.
+ If you want to access the value pointed to as well as
+ the pointer itself, use <tt>rcu_dereference()</tt>
+ instead of <tt>rcu_access_pointer()</tt>.
+<li> The call to <tt>rcu_access_pointer()</tt> need not be
+ protected.
+ In contrast, <tt>rcu_dereference()</tt> must either be
+ within an RCU read-side critical section or in a code
+ segment where the pointer cannot change, for example, in
+ code protected by the corresponding update-side lock.
+</ol>
+
+<p>@@QQ@@
+Without the <tt>rcu_dereference()</tt> or the
+<tt>rcu_access_pointer()</tt>, what destructive optimizations
+might the compiler make use of?
+<p>@@QQA@@
+Let's start with what happens to <tt>do_something_gp()</tt>
+if it fails to use <tt>rcu_dereference()</tt>.
+It could reuse a value formerly fetched from this same pointer.
+It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
+manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
+mash-up of two distince pointer values.
+It might even use value-speculation optimizations, where it makes a wrong
+guess, but by the time it gets around to checking the value, an update
+has changed the pointer to match the wrong guess.
+Too bad about any dereferences that returned pre-initialization garbage
+in the meantime!
+
+<p>
+For <tt>remove_gp_synchronous()</tt>, as long as all modifications
+to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
+the above optimizations are harmless.
+However,
+with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
+<tt>sparse</tt> will complain if you
+define <tt>gp</tt> with <tt>__rcu</tt> and then
+access it without using
+either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
+<p>@@QQE@@
+
+<p>
+In short, RCU's publish-subscribe guarantee is provided by the combination
+of <tt>rcu_assign_pointer()</tt> and <tt>rcu_dereference()</tt>.
+This guarantee allows data elements to be safely added to RCU-protected
+linked data structures without disrupting RCU readers.
+This guarantee can be used in combination with the grace-period
+guarantee to also allow data elements to be removed from RCU-protected
+linked data structures, again without disrupting RCU readers.
+
+<p>
+This guarantee was only partially premeditated.
+DYNIX/ptx used an explicit memory barrier for publication, but had nothing
+resembling <tt>rcu_dereference()</tt> for subscription, nor did it
+have anything resembling the <tt>smp_read_barrier_depends()</tt>
+that was later subsumed into <tt>rcu_dereference()</tt>.
+The need for these operations made itself known quite suddenly at a
+late-1990s meeting with the DEC Alpha architects, back in the days when
+DEC was still a free-standing company.
+It took the Alpha architects a good hour to convince me that any sort
+of barrier would ever be needed, and it then took me a good <i>two</i> hours
+to convince them that their documentation did not make this point clear.
+More recent work with the C and C++ standards committees have provided
+much education on tricks and traps from the compiler.
+In short, compilers were much less tricky in the early 1990s, but in
+2015, don't even think about omitting <tt>rcu_dereference()</tt>!
+
+<h3><a name="Memory-Barrier Guarantees">Memory-Barrier Guarantees</a></h3>
+
+<p>
+The previous section's simple linked-data-structure scenario clearly
+demonstrates the need for RCU's stringent memory-ordering guarantees on
+systems with more than one CPU:
+
+<ol>
+<li> Each CPU that has an RCU read-side critical section that
+ begins before <tt>synchronize_rcu()</tt> starts is
+ guaranteed to execute a full memory barrier between the time
+ that the RCU read-side critical section ends and the time that
+ <tt>synchronize_rcu()</tt> returns.
+ Without this guarantee, a pre-existing RCU read-side critical section
+ might hold a reference to the newly removed <tt>struct foo</tt>
+ after the <tt>kfree()</tt> on line&nbsp;14 of
+ <tt>remove_gp_synchronous()</tt>.
+<li> Each CPU that has an RCU read-side critical section that ends
+ after <tt>synchronize_rcu()</tt> returns is guaranteed
+ to execute a full memory barrier between the time that
+ <tt>synchronize_rcu()</tt> begins and the time that the RCU
+ read-side critical section begins.
+ Without this guarantee, a later RCU read-side critical section
+ running after the <tt>kfree()</tt> on line&nbsp;14 of
+ <tt>remove_gp_synchronous()</tt> might
+ later run <tt>do_something_gp()</tt> and find the
+ newly deleted <tt>struct foo</tt>.
+<li> If the task invoking <tt>synchronize_rcu()</tt> remains
+ on a given CPU, then that CPU is guaranteed to execute a full
+ memory barrier sometime during the execution of
+ <tt>synchronize_rcu()</tt>.
+ This guarantee ensures that the <tt>kfree()</tt> on
+ line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
+ execute after the removal on line&nbsp;11.
+<li> If the task invoking <tt>synchronize_rcu()</tt> migrates
+ among a group of CPUs during that invocation, then each of the
+ CPUs in that group is guaranteed to execute a full memory barrier
+ sometime during the execution of <tt>synchronize_rcu()</tt>.
+ This guarantee also ensures that the <tt>kfree()</tt> on
+ line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
+ execute after the removal on
+ line&nbsp;11, but also in the case where the thread executing the
+ <tt>synchronize_rcu()</tt> migrates in the meantime.
+</ol>
+
+<p>@@QQ@@
+Given that multiple CPUs can start RCU read-side critical sections
+at any time without any ordering whatsoever, how can RCU possibly tell whether
+or not a given RCU read-side critical section starts before a
+given instance of <tt>synchronize_rcu()</tt>?
+<p>@@QQA@@
+If RCU cannot tell whether or not a given
+RCU read-side critical section starts before a
+given instance of <tt>synchronize_rcu()</tt>,
+then it must assume that the RCU read-side critical section
+started first.
+In other words, a given instance of <tt>synchronize_rcu()</tt>
+can avoid waiting on a given RCU read-side critical section only
+if it can prove that <tt>synchronize_rcu()</tt> started first.
+<p>@@QQE@@
+
+<p>@@QQ@@
+The first and second guarantees require unbelievably strict ordering!
+Are all these memory barriers <i> really</i> required?
+<p>@@QQA@@
+Yes, they really are required.
+To see why the first guarantee is required, consider the following
+sequence of events:
+
+<ol>
+<li> CPU 1: <tt>rcu_read_lock()</tt>
+<li> CPU 1: <tt>q = rcu_dereference(gp);
+ /* Very likely to return p. */</tt>
+<li> CPU 0: <tt>list_del_rcu(p);</tt>
+<li> CPU 0: <tt>synchronize_rcu()</tt> starts.
+<li> CPU 1: <tt>do_something_with(q-&gt;a);
+ /* No smp_mb(), so might happen after kfree(). */</tt>
+<li> CPU 1: <tt>rcu_read_unlock()</tt>
+<li> CPU 0: <tt>synchronize_rcu()</tt> returns.
+<li> CPU 0: <tt>kfree(p);</tt>
+</ol>
+
+<p>
+Therefore, there absolutely must be a full memory barrier between the
+end of the RCU read-side critical section and the end of the
+grace period.
+
+<p>
+The sequence of events demonstrating the necessity of the second rule
+is roughly similar:
+
+<ol>
+<li> CPU 0: <tt>list_del_rcu(p);</tt>
+<li> CPU 0: <tt>synchronize_rcu()</tt> starts.
+<li> CPU 1: <tt>rcu_read_lock()</tt>
+<li> CPU 1: <tt>q = rcu_dereference(gp);
+ /* Might return p if no memory barrier. */</tt>
+<li> CPU 0: <tt>synchronize_rcu()</tt> returns.
+<li> CPU 0: <tt>kfree(p);</tt>
+<li> CPU 1: <tt>do_something_with(q-&gt;a); /* Boom!!! */</tt>
+<li> CPU 1: <tt>rcu_read_unlock()</tt>
+</ol>
+
+<p>
+And similarly, without a memory barrier between the beginning of the
+grace period and the beginning of the RCU read-side critical section,
+CPU&nbsp;1 might end up accessing the freelist.
+
+<p>
+The &ldquo;as if&rdquo; rule of course applies, so that any implementation
+that acts as if the appropriate memory barriers were in place is a
+correct implementation.
+That said, it is much easier to fool yourself into believing that you have
+adhered to the as-if rule than it is to actually adhere to it!
+<p>@@QQE@@
+
+<p>
+Note that these memory-barrier requirements do not replace the fundamental
+RCU requirement that a grace period wait for all pre-existing readers.
+On the contrary, the memory barriers called out in this section must operate in
+such a way as to <i>enforce</i> this fundamental requirement.
+Of course, different implementations enforce this requirement in different
+ways, but enforce it they must.
+
+<h3><a name="RCU Primitives Guaranteed to Execute Unconditionally">RCU Primitives Guaranteed to Execute Unconditionally</a></h3>
+
+<p>
+The common-case RCU primitives are unconditional.
+They are invoked, they do their job, and they return, with no possibility
+of error, and no need to retry.
+This is a key RCU design philosophy.
+
+<p>
+However, this philosophy is pragmatic rather than pigheaded.
+If someone comes up with a good justification for a particular conditional
+RCU primitive, it might well be implemented and added.
+After all, this guarantee was reverse-engineered, not premeditated.
+The unconditional nature of the RCU primitives was initially an
+accident of implementation, and later experience with synchronization
+primitives with conditional primitives caused me to elevate this
+accident to a guarantee.
+Therefore, the justification for adding a conditional primitive to
+RCU would need to be based on detailed and compelling use cases.
+
+<h3><a name="Guaranteed Read-to-Write Upgrade">Guaranteed Read-to-Write Upgrade</a></h3>
+
+<p>
+As far as RCU is concerned, it is always possible to carry out an
+update within an RCU read-side critical section.
+For example, that RCU read-side critical section might search for
+a given data element, and then might acquire the update-side
+spinlock in order to update that element, all while remaining
+in that RCU read-side critical section.
+Of course, it is necessary to exit the RCU read-side critical section
+before invoking <tt>synchronize_rcu()</tt>, however, this
+inconvenience can be avoided through use of the
+<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt> API members
+described later in this document.
+
+<p>@@QQ@@
+But how does the upgrade-to-write operation exclude other readers?
+<p>@@QQA@@
+It doesn't, just like normal RCU updates, which also do not exclude
+RCU readers.
+<p>@@QQE@@
+
+<p>
+This guarantee allows lookup code to be shared between read-side
+and update-side code, and was premeditated, appearing in the earliest
+DYNIX/ptx RCU documentation.
+
+<h2><a name="Fundamental Non-Requirements">Fundamental Non-Requirements</a></h2>
+
+<p>
+RCU provides extremely lightweight readers, and its read-side guarantees,
+though quite useful, are correspondingly lightweight.
+It is therefore all too easy to assume that RCU is guaranteeing more
+than it really is.
+Of course, the list of things that RCU does not guarantee is infinitely
+long, however, the following sections list a few non-guarantees that
+have caused confusion.
+Except where otherwise noted, these non-guarantees were premeditated.
+
+<ol>
+<li> <a href="#Readers Impose Minimal Ordering">
+ Readers Impose Minimal Ordering</a>
+<li> <a href="#Readers Do Not Exclude Updaters">
+ Readers Do Not Exclude Updaters</a>
+<li> <a href="#Updaters Only Wait For Old Readers">
+ Updaters Only Wait For Old Readers</a>
+<li> <a href="#Grace Periods Don't Partition Read-Side Critical Sections">
+ Grace Periods Don't Partition Read-Side Critical Sections</a>
+<li> <a href="#Read-Side Critical Sections Don't Partition Grace Periods">
+ Read-Side Critical Sections Don't Partition Grace Periods</a>
+<li> <a href="#Disabling Preemption Does Not Block Grace Periods">
+ Disabling Preemption Does Not Block Grace Periods</a>
+</ol>
+
+<h3><a name="Readers Impose Minimal Ordering">Readers Impose Minimal Ordering</a></h3>
+
+<p>
+Reader-side markers such as <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> provide absolutely no ordering guarantees
+except through their interaction with the grace-period APIs such as
+<tt>synchronize_rcu()</tt>.
+To see this, consider the following pair of threads:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(x, 1);
+ 5 rcu_read_unlock();
+ 6 rcu_read_lock();
+ 7 WRITE_ONCE(y, 1);
+ 8 rcu_read_unlock();
+ 9 }
+10
+11 void thread1(void)
+12 {
+13 rcu_read_lock();
+14 r1 = READ_ONCE(y);
+15 rcu_read_unlock();
+16 rcu_read_lock();
+17 r2 = READ_ONCE(x);
+18 rcu_read_unlock();
+19 }
+</pre>
+</blockquote>
+
+<p>
+After <tt>thread0()</tt> and <tt>thread1()</tt> execute
+concurrently, it is quite possible to have
+
+<blockquote>
+<pre>
+(r1 == 1 &amp;&amp; r2 == 0)
+</pre>
+</blockquote>
+
+(that is, <tt>y</tt> appears to have been assigned before <tt>x</tt>),
+which would not be possible if <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> had much in the way of ordering
+properties.
+But they do not, so the CPU is within its rights
+to do significant reordering.
+This is by design: Any significant ordering constraints would slow down
+these fast-path APIs.
+
+<p>@@QQ@@
+Can't the compiler also reorder this code?
+<p>@@QQA@@
+No, the volatile casts in <tt>READ_ONCE()</tt> and
+<tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
+this particular case.
+<p>@@QQE@@
+
+<h3><a name="Readers Do Not Exclude Updaters">Readers Do Not Exclude Updaters</a></h3>
+
+<p>
+Neither <tt>rcu_read_lock()</tt> nor <tt>rcu_read_unlock()</tt>
+exclude updates.
+All they do is to prevent grace periods from ending.
+The following example illustrates this:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 r1 = READ_ONCE(y);
+ 5 if (r1) {
+ 6 do_something_with_nonzero_x();
+ 7 r2 = READ_ONCE(x);
+ 8 WARN_ON(!r2); /* BUG!!! */
+ 9 }
+10 rcu_read_unlock();
+11 }
+12
+13 void thread1(void)
+14 {
+15 spin_lock(&amp;my_lock);
+16 WRITE_ONCE(x, 1);
+17 WRITE_ONCE(y, 1);
+18 spin_unlock(&amp;my_lock);
+19 }
+</pre>
+</blockquote>
+
+<p>
+If the <tt>thread0()</tt> function's <tt>rcu_read_lock()</tt>
+excluded the <tt>thread1()</tt> function's update,
+the <tt>WARN_ON()</tt> could never fire.
+But the fact is that <tt>rcu_read_lock()</tt> does not exclude
+much of anything aside from subsequent grace periods, of which
+<tt>thread1()</tt> has none, so the
+<tt>WARN_ON()</tt> can and does fire.
+
+<h3><a name="Updaters Only Wait For Old Readers">Updaters Only Wait For Old Readers</a></h3>
+
+<p>
+It might be tempting to assume that after <tt>synchronize_rcu()</tt>
+completes, there are no readers executing.
+This temptation must be avoided because
+new readers can start immediately after <tt>synchronize_rcu()</tt>
+starts, and <tt>synchronize_rcu()</tt> is under no
+obligation to wait for these new readers.
+
+<p>@@QQ@@
+Suppose that synchronize_rcu() did wait until all readers had completed.
+Would the updater be able to rely on this?
+<p>@@QQA@@
+No.
+Even if <tt>synchronize_rcu()</tt> were to wait until
+all readers had completed, a new reader might start immediately after
+<tt>synchronize_rcu()</tt> completed.
+Therefore, the code following
+<tt>synchronize_rcu()</tt> cannot rely on there being no readers
+in any case.
+<p>@@QQE@@
+
+<h3><a name="Grace Periods Don't Partition Read-Side Critical Sections">
+Grace Periods Don't Partition Read-Side Critical Sections</a></h3>
+
+<p>
+It is tempting to assume that if any part of one RCU read-side critical
+section precedes a given grace period, and if any part of another RCU
+read-side critical section follows that same grace period, then all of
+the first RCU read-side critical section must precede all of the second.
+However, this just isn't the case: A single grace period does not
+partition the set of RCU read-side critical sections.
+An example of this situation can be illustrated as follows, where
+<tt>x</tt>, <tt>y</tt>, and <tt>z</tt> are initially all zero:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(a, 1);
+ 5 WRITE_ONCE(b, 1);
+ 6 rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11 r1 = READ_ONCE(a);
+12 synchronize_rcu();
+13 WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18 rcu_read_lock();
+19 r2 = READ_ONCE(b);
+20 r3 = READ_ONCE(c);
+21 rcu_read_unlock();
+22 }
+</pre>
+</blockquote>
+
+<p>
+It turns out that the outcome:
+
+<blockquote>
+<pre>
+(r1 == 1 &amp;&amp; r2 == 0 &amp;&amp; r3 == 1)
+</pre>
+</blockquote>
+
+is entirely possible.
+The following figure show how this can happen, with each circled
+<tt>QS</tt> indicating the point at which RCU recorded a
+<i>quiescent state</i> for each thread, that is, a state in which
+RCU knows that the thread cannot be in the midst of an RCU read-side
+critical section that started before the current grace period:
+
+<p><img src="GPpartitionReaders1.svg" alt="GPpartitionReaders1.svg" width="60%"></p>
+
+<p>
+If it is necessary to partition RCU read-side critical sections in this
+manner, it is necessary to use two grace periods, where the first
+grace period is known to end before the second grace period starts:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(a, 1);
+ 5 WRITE_ONCE(b, 1);
+ 6 rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11 r1 = READ_ONCE(a);
+12 synchronize_rcu();
+13 WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18 r2 = READ_ONCE(c);
+19 synchronize_rcu();
+20 WRITE_ONCE(d, 1);
+21 }
+22
+23 void thread3(void)
+24 {
+25 rcu_read_lock();
+26 r3 = READ_ONCE(b);
+27 r4 = READ_ONCE(d);
+28 rcu_read_unlock();
+29 }
+</pre>
+</blockquote>
+
+<p>
+Here, if <tt>(r1 == 1)</tt>, then
+<tt>thread0()</tt>'s write to <tt>b</tt> must happen
+before the end of <tt>thread1()</tt>'s grace period.
+If in addition <tt>(r4 == 1)</tt>, then
+<tt>thread3()</tt>'s read from <tt>b</tt> must happen
+after the beginning of <tt>thread2()</tt>'s grace period.
+If it is also the case that <tt>(r2 == 1)</tt>, then the
+end of <tt>thread1()</tt>'s grace period must precede the
+beginning of <tt>thread2()</tt>'s grace period.
+This mean that the two RCU read-side critical sections cannot overlap,
+guaranteeing that <tt>(r3 == 1)</tt>.
+As a result, the outcome:
+
+<blockquote>
+<pre>
+(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 0 &amp;&amp; r4 == 1)
+</pre>
+</blockquote>
+
+cannot happen.
+
+<p>
+This non-requirement was also non-premeditated, but became apparent
+when studying RCU's interaction with memory ordering.
+
+<h3><a name="Read-Side Critical Sections Don't Partition Grace Periods">
+Read-Side Critical Sections Don't Partition Grace Periods</a></h3>
+
+<p>
+It is also tempting to assume that if an RCU read-side critical section
+happens between a pair of grace periods, then those grace periods cannot
+overlap.
+However, this temptation leads nowhere good, as can be illustrated by
+the following, with all variables initially zero:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(a, 1);
+ 5 WRITE_ONCE(b, 1);
+ 6 rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11 r1 = READ_ONCE(a);
+12 synchronize_rcu();
+13 WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18 rcu_read_lock();
+19 WRITE_ONCE(d, 1);
+20 r2 = READ_ONCE(c);
+21 rcu_read_unlock();
+22 }
+23
+24 void thread3(void)
+25 {
+26 r3 = READ_ONCE(d);
+27 synchronize_rcu();
+28 WRITE_ONCE(e, 1);
+29 }
+30
+31 void thread4(void)
+32 {
+33 rcu_read_lock();
+34 r4 = READ_ONCE(b);
+35 r5 = READ_ONCE(e);
+36 rcu_read_unlock();
+37 }
+</pre>
+</blockquote>
+
+<p>
+In this case, the outcome:
+
+<blockquote>
+<pre>
+(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 1 &amp;&amp; r4 == 0 &amp&amp; r5 == 1)
+</pre>
+</blockquote>
+
+is entirely possible, as illustrated below:
+
+<p><img src="ReadersPartitionGP1.svg" alt="ReadersPartitionGP1.svg" width="100%"></p>
+
+<p>
+Again, an RCU read-side critical section can overlap almost all of a
+given grace period, just so long as it does not overlap the entire
+grace period.
+As a result, an RCU read-side critical section cannot partition a pair
+of RCU grace periods.
+
+<p>@@QQ@@
+How long a sequence of grace periods, each separated by an RCU read-side
+critical section, would be required to partition the RCU read-side
+critical sections at the beginning and end of the chain?
+<p>@@QQA@@
+In theory, an infinite number.
+In practice, an unknown number that is sensitive to both implementation
+details and timing considerations.
+Therefore, even in practice, RCU users must abide by the theoretical rather
+than the practical answer.
+<p>@@QQE@@
+
+<h3><a name="Disabling Preemption Does Not Block Grace Periods">
+Disabling Preemption Does Not Block Grace Periods</a></h3>
+
+<p>
+There was a time when disabling preemption on any given CPU would block
+subsequent grace periods.
+However, this was an accident of implementation and is not a requirement.
+And in the current Linux-kernel implementation, disabling preemption
+on a given CPU in fact does not block grace periods, as Oleg Nesterov
+<a href="https://lkml.kernel.org/g/20150614193825.GA19582@redhat.com">demonstrated</a>.
+
+<p>
+If you need a preempt-disable region to block grace periods, you need to add
+<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>, for example
+as follows:
+
+<blockquote>
+<pre>
+ 1 preempt_disable();
+ 2 rcu_read_lock();
+ 3 do_something();
+ 4 rcu_read_unlock();
+ 5 preempt_enable();
+ 6
+ 7 /* Spinlocks implicitly disable preemption. */
+ 8 spin_lock(&amp;mylock);
+ 9 rcu_read_lock();
+10 do_something();
+11 rcu_read_unlock();
+12 spin_unlock(&amp;mylock);
+</pre>
+</blockquote>
+
+<p>
+In theory, you could enter the RCU read-side critical section first,
+but it is more efficient to keep the entire RCU read-side critical
+section contained in the preempt-disable region as shown above.
+Of course, RCU read-side critical sections that extend outside of
+preempt-disable regions will work correctly, but such critical sections
+can be preempted, which forces <tt>rcu_read_unlock()</tt> to do
+more work.
+And no, this is <i>not</i> an invitation to enclose all of your RCU
+read-side critical sections within preempt-disable regions, because
+doing so would degrade real-time response.
+
+<p>
+This non-requirement appeared with preemptible RCU.
+If you need a grace period that waits on non-preemptible code regions, use
+<a href="#Sched Flavor">RCU-sched</a>.
+
+<h2><a name="Parallelism Facts of Life">Parallelism Facts of Life</a></h2>
+
+<p>
+These parallelism facts of life are by no means specific to RCU, but
+the RCU implementation must abide by them.
+They therefore bear repeating:
+
+<ol>
+<li> Any CPU or task may be delayed at any time,
+ and any attempts to avoid these delays by disabling
+ preemption, interrupts, or whatever are completely futile.
+ This is most obvious in preemptible user-level
+ environments and in virtualized environments (where
+ a given guest OS's VCPUs can be preempted at any time by
+ the underlying hypervisor), but can also happen in bare-metal
+ environments due to ECC errors, NMIs, and other hardware
+ events.
+ Although a delay of more than about 20 seconds can result
+ in splats, the RCU implementation is obligated to use
+ algorithms that can tolerate extremely long delays, but where
+ &ldquo;extremely long&rdquo; is not long enough to allow
+ wrap-around when incrementing a 64-bit counter.
+<li> Both the compiler and the CPU can reorder memory accesses.
+ Where it matters, RCU must use compiler directives and
+ memory-barrier instructions to preserve ordering.
+<li> Conflicting writes to memory locations in any given cache line
+ will result in expensive cache misses.
+ Greater numbers of concurrent writes and more-frequent
+ concurrent writes will result in more dramatic slowdowns.
+ RCU is therefore obligated to use algorithms that have
+ sufficient locality to avoid significant performance and
+ scalability problems.
+<li> As a rough rule of thumb, only one CPU's worth of processing
+ may be carried out under the protection of any given exclusive
+ lock.
+ RCU must therefore use scalable locking designs.
+<li> Counters are finite, especially on 32-bit systems.
+ RCU's use of counters must therefore tolerate counter wrap,
+ or be designed such that counter wrap would take way more
+ time than a single system is likely to run.
+ An uptime of ten years is quite possible, a runtime
+ of a century much less so.
+ As an example of the latter, RCU's dyntick-idle nesting counter
+ allows 54 bits for interrupt nesting level (this counter
+ is 64 bits even on a 32-bit system).
+ Overflowing this counter requires 2<sup>54</sup>
+ half-interrupts on a given CPU without that CPU ever going idle.
+ If a half-interrupt happened every microsecond, it would take
+ 570 years of runtime to overflow this counter, which is currently
+ believed to be an acceptably long time.
+<li> Linux systems can have thousands of CPUs running a single
+ Linux kernel in a single shared-memory environment.
+ RCU must therefore pay close attention to high-end scalability.
+</ol>
+
+<p>
+This last parallelism fact of life means that RCU must pay special
+attention to the preceding facts of life.
+The idea that Linux might scale to systems with thousands of CPUs would
+have been met with some skepticism in the 1990s, but these requirements
+would have otherwise have been unsurprising, even in the early 1990s.
+
+<h2><a name="Quality-of-Implementation Requirements">Quality-of-Implementation Requirements</a></h2>
+
+<p>
+These sections list quality-of-implementation requirements.
+Although an RCU implementation that ignores these requirements could
+still be used, it would likely be subject to limitations that would
+make it inappropriate for industrial-strength production use.
+Classes of quality-of-implementation requirements are as follows:
+
+<ol>
+<li> <a href="#Specialization">Specialization</a>
+<li> <a href="#Performance and Scalability">Performance and Scalability</a>
+<li> <a href="#Composability">Composability</a>
+<li> <a href="#Corner Cases">Corner Cases</a>
+</ol>
+
+<p>
+These classes is covered in the following sections.
+
+<h3><a name="Specialization">Specialization</a></h3>
+
+<p>
+RCU is and always has been intended primarily for read-mostly situations, as
+illustrated by the following figure.
+This means that RCU's read-side primitives are optimized, often at the
+expense of its update-side primitives.
+
+<p><img src="RCUApplicability.svg" alt="RCUApplicability.svg" width="70%"></p>
+
+<p>
+This focus on read-mostly situations means that RCU must interoperate
+with other synchronization primitives.
+For example, the <tt>add_gp()</tt> and <tt>remove_gp_synchronous()</tt>
+examples discussed earlier use RCU to protect readers and locking to
+coordinate updaters.
+However, the need extends much farther, requiring that a variety of
+synchronization primitives be legal within RCU read-side critical sections,
+including spinlocks, sequence locks, atomic operations, reference
+counters, and memory barriers.
+
+<p>@@QQ@@
+What about sleeping locks?
+<p>@@QQA@@
+These are forbidden within Linux-kernel RCU read-side critical sections
+because it is not legal to place a quiescent state (in this case,
+voluntary context switch) within an RCU read-side critical section.
+However, sleeping locks may be used within userspace RCU read-side critical
+sections, and also within Linux-kernel sleepable RCU
+<a href="#Sleepable RCU">(SRCU)</a>
+read-side critical sections.
+In addition, the -rt patchset turns spinlocks into a sleeping locks so
+that the corresponding critical sections can be preempted, which
+also means that these sleeplockified spinlocks (but not other sleeping locks!)
+may be acquire within -rt-Linux-kernel RCU read-side critical sections.
+
+<p>
+Note that it <i>is</i> legal for a normal RCU read-side critical section
+to conditionally acquire a sleeping locks (as in <tt>mutex_trylock()</tt>),
+but only as long as it does not loop indefinitely attempting to
+conditionally acquire that sleeping locks.
+The key point is that things like <tt>mutex_trylock()</tt>
+either return with the mutex held, or return an error indication if
+the mutex was not immediately available.
+Either way, <tt>mutex_trylock()</tt> returns immediately without sleeping.
+<p>@@QQE@@
+
+<p>
+It often comes as a surprise that many algorithms do not require a
+consistent view of data, but many can function in that mode,
+with network routing being the poster child.
+Internet routing algorithms take significant time to propagate
+updates, so that by the time an update arrives at a given system,
+that system has been sending network traffic the wrong way for
+a considerable length of time.
+Having a few threads continue to send traffic the wrong way for a
+few more milliseconds is clearly not a problem: In the worst case,
+TCP retransmissions will eventually get the data where it needs to go.
+In general, when tracking the state of the universe outside of the
+computer, some level of inconsistency must be tolerated due to
+speed-of-light delays if nothing else.
+
+<p>
+Furthermore, uncertainty about external state is inherent in many cases.
+For example, a pair of veternarians might use heartbeat to determine
+whether or not a given cat was alive.
+But how long should they wait after the last heartbeat to decide that
+the cat is in fact dead?
+Waiting less than 400 milliseconds makes no sense because this would
+mean that a relaxed cat would be considered to cycle between death
+and life more than 100 times per minute.
+Moreover, just as with human beings, a cat's heart might stop for
+some period of time, so the exact wait period is a judgment call.
+One of our pair of veternarians might wait 30 seconds before pronouncing
+the cat dead, while the other might insist on waiting a full minute.
+The two veternarians would then disagree on the state of the cat during
+the final 30 seconds of the minute following the last heartbeat, as
+fancifully illustrated below:
+
+<p><img src="2013-08-is-it-dead.png" alt="2013-08-is-it-dead.png" width="431"></p>
+
+<p>
+Interestingly enough, this same situation applies to hardware.
+When push comes to shove, how do we tell whether or not some
+external server has failed?
+We send messages to it periodically, and declare it failed if we
+don't receive a response within a given period of time.
+Policy decisions can usually tolerate short
+periods of inconsistency.
+The policy was decided some time ago, and is only now being put into
+effect, so a few milliseconds of delay is normally inconsequential.
+
+<p>
+However, there are algorithms that absolutely must see consistent data.
+For example, the translation between a user-level SystemV semaphore
+ID to the corresponding in-kernel data structure is protected by RCU,
+but it is absolutely forbidden to update a semaphore that has just been
+removed.
+In the Linux kernel, this need for consistency is accommodated by acquiring
+spinlocks located in the in-kernel data structure from within
+the RCU read-side critical section, and this is indicated by the
+green box in the figure above.
+Many other techniques may be used, and are in fact used within the
+Linux kernel.
+
+<p>
+In short, RCU is not required to maintain consistency, and other
+mechanisms may be used in concert with RCU when consistency is required.
+RCU's specialization allows it to do its job extremely well, and its
+ability to interoperate with other synchronization mechanisms allows
+the right mix of synchronization tools to be used for a given job.
+
+<h3><a name="Performance and Scalability">Performance and Scalability</a></h3>
+
+<p>
+Energy efficiency is a critical component of performance today,
+and Linux-kernel RCU implementations must therefore avoid unnecessarily
+awakening idle CPUs.
+I cannot claim that this requirement was premeditated.
+In fact, I learned of it during a telephone conversation in which I
+was given &ldquo;frank and open&rdquo; feedback on the importance
+of energy efficiency in battery-powered systems and on specific
+energy-efficiency shortcomings of the Linux-kernel RCU implementation.
+In my experience, the battery-powered embedded community will consider
+any unnecessary wakeups to be extremely unfriendly acts.
+So much so that mere Linux-kernel-mailing-list posts are
+insufficient to vent their ire.
+
+<p>
+Memory consumption is not particularly important for in most
+situations, and has become decreasingly
+so as memory sizes have expanded and memory
+costs have plummeted.
+However, as I learned from Matt Mackall's
+<a href="http://elinux.org/Linux_Tiny-FAQ">bloatwatch</a>
+efforts, memory footprint is critically important on single-CPU systems with
+non-preemptible (<tt>CONFIG_PREEMPT=n</tt>) kernels, and thus
+<a href="https://lkml.kernel.org/g/20090113221724.GA15307@linux.vnet.ibm.com">tiny RCU</a>
+was born.
+Josh Triplett has since taken over the small-memory banner with his
+<a href="https://tiny.wiki.kernel.org/">Linux kernel tinification</a>
+project, which resulted in
+<a href="#Sleepable RCU">SRCU</a>
+becoming optional for those kernels not needing it.
+
+<p>
+The remaining performance requirements are, for the most part,
+unsurprising.
+For example, in keeping with RCU's read-side specialization,
+<tt>rcu_dereference()</tt> should have negligible overhead (for
+example, suppression of a few minor compiler optimizations).
+Similarly, in non-preemptible environments, <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> should have exactly zero overhead.
+
+<p>
+In preemptible environments, in the case where the RCU read-side
+critical section was not preempted (as will be the case for the
+highest-priority real-time process), <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> should have minimal overhead.
+In particular, they should not contain atomic read-modify-write
+operations, memory-barrier instructions, preemption disabling,
+interrupt disabling, or backwards branches.
+However, in the case where the RCU read-side critical section was preempted,
+<tt>rcu_read_unlock()</tt> may acquire spinlocks and disable interrupts.
+This is why it is better to nest an RCU read-side critical section
+within a preempt-disable region than vice versa, at least in cases
+where that critical section is short enough to avoid unduly degrading
+real-time latencies.
+
+<p>
+The <tt>synchronize_rcu()</tt> grace-period-wait primitive is
+optimized for throughput.
+It may therefore incur several milliseconds of latency in addition to
+the duration of the longest RCU read-side critical section.
+On the other hand, multiple concurrent invocations of
+<tt>synchronize_rcu()</tt> are required to use batching optimizations
+so that they can be satisfied by a single underlying grace-period-wait
+operation.
+For example, in the Linux kernel, it is not unusual for a single
+grace-period-wait operation to serve more than
+<a href="https://www.usenix.org/conference/2004-usenix-annual-technical-conference/making-rcu-safe-deep-sub-millisecond-response">1,000 separate invocations</a>
+of <tt>synchronize_rcu()</tt>, thus amortizing the per-invocation
+overhead down to nearly zero.
+However, the grace-period optimization is also required to avoid
+measurable degradation of real-time scheduling and interrupt latencies.
+
+<p>
+In some cases, the multi-millisecond <tt>synchronize_rcu()</tt>
+latencies are unacceptable.
+In these cases, <tt>synchronize_rcu_expedited()</tt> may be used
+instead, reducing the grace-period latency down to a few tens of
+microseconds on small systems, at least in cases where the RCU read-side
+critical sections are short.
+There are currently no special latency requirements for
+<tt>synchronize_rcu_expedited()</tt> on large systems, but,
+consistent with the empirical nature of the RCU specification,
+that is subject to change.
+However, there most definitely are scalability requirements:
+A storm of <tt>synchronize_rcu_expedited()</tt> invocations on 4096
+CPUs should at least make reasonable forward progress.
+In return for its shorter latencies, <tt>synchronize_rcu_expedited()</tt>
+is permitted to impose modest degradation of real-time latency
+on non-idle online CPUs.
+That said, it will likely be necessary to take further steps to reduce this
+degradation, hopefully to roughly that of a scheduling-clock interrupt.
+
+<p>
+There are a number of situations where even
+<tt>synchronize_rcu_expedited()</tt>'s reduced grace-period
+latency is unacceptable.
+In these situations, the asynchronous <tt>call_rcu()</tt> can be
+used in place of <tt>synchronize_rcu()</tt> as follows:
+
+<blockquote>
+<pre>
+ 1 struct foo {
+ 2 int a;
+ 3 int b;
+ 4 struct rcu_head rh;
+ 5 };
+ 6
+ 7 static void remove_gp_cb(struct rcu_head *rhp)
+ 8 {
+ 9 struct foo *p = container_of(rhp, struct foo, rh);
+10
+11 kfree(p);
+12 }
+13
+14 bool remove_gp_asynchronous(void)
+15 {
+16 struct foo *p;
+17
+18 spin_lock(&amp;gp_lock);
+19 p = rcu_dereference(gp);
+20 if (!p) {
+21 spin_unlock(&amp;gp_lock);
+22 return false;
+23 }
+24 rcu_assign_pointer(gp, NULL);
+25 call_rcu(&amp;p-&gt;rh, remove_gp_cb);
+26 spin_unlock(&amp;gp_lock);
+27 return true;
+28 }
+</pre>
+</blockquote>
+
+<p>
+A definition of <tt>struct foo</tt> is finally needed, and appears
+on lines&nbsp;1-5.
+The function <tt>remove_gp_cb()</tt> is passed to <tt>call_rcu()</tt>
+on line&nbsp;25, and will be invoked after the end of a subsequent
+grace period.
+This gets the same effect as <tt>remove_gp_synchronous()</tt>,
+but without forcing the updater to wait for a grace period to elapse.
+The <tt>call_rcu()</tt> function may be used in a number of
+situations where neither <tt>synchronize_rcu()</tt> nor
+<tt>synchronize_rcu_expedited()</tt> would be legal,
+including within preempt-disable code, <tt>local_bh_disable()</tt> code,
+interrupt-disable code, and interrupt handlers.
+However, even <tt>call_rcu()</tt> is illegal within NMI handlers.
+The callback function (<tt>remove_gp_cb()</tt> in this case) will be
+executed within softirq (software interrupt) environment within the
+Linux kernel,
+either within a real softirq handler or under the protection
+of <tt>local_bh_disable()</tt>.
+In both the Linux kernel and in userspace, it is bad practice to
+write an RCU callback function that takes too long.
+Long-running operations should be relegated to separate threads or
+(in the Linux kernel) workqueues.
+
+<p>@@QQ@@
+Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
+After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
+structure, which would interact badly with concurrent insertions.
+Doesn't this mean that <tt>rcu_dereference()</tt> is required?
+<p>@@QQA@@
+Presumably the <tt>-&gt;gp_lock</tt> acquired on line&nbsp;18 excludes
+any changes, including any insertions that <tt>rcu_dereference()</tt>
+would protect against.
+Therefore, any insertions will be delayed until after <tt>-&gt;gp_lock</tt>
+is released on line&nbsp;25, which in turn means that
+<tt>rcu_access_pointer()</tt> suffices.
+<p>@@QQE@@
+
+<p>
+However, all that <tt>remove_gp_cb()</tt> is doing is
+invoking <tt>kfree()</tt> on the data element.
+This is a common idiom, and is supported by <tt>kfree_rcu()</tt>,
+which allows &ldquo;fire and forget&rdquo; operation as shown below:
+
+<blockquote>
+<pre>
+ 1 struct foo {
+ 2 int a;
+ 3 int b;
+ 4 struct rcu_head rh;
+ 5 };
+ 6
+ 7 bool remove_gp_faf(void)
+ 8 {
+ 9 struct foo *p;
+10
+11 spin_lock(&amp;gp_lock);
+12 p = rcu_dereference(gp);
+13 if (!p) {
+14 spin_unlock(&amp;gp_lock);
+15 return false;
+16 }
+17 rcu_assign_pointer(gp, NULL);
+18 kfree_rcu(p, rh);
+19 spin_unlock(&amp;gp_lock);
+20 return true;
+21 }
+</pre>
+</blockquote>
+
+<p>
+Note that <tt>remove_gp_faf()</tt> simply invokes
+<tt>kfree_rcu()</tt> and proceeds, without any need to pay any
+further attention to the subsequent grace period and <tt>kfree()</tt>.
+It is permissible to invoke <tt>kfree_rcu()</tt> from the same
+environments as for <tt>call_rcu()</tt>.
+Interestingly enough, DYNIX/ptx had the equivalents of
+<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>, but not
+<tt>synchronize_rcu()</tt>.
+This was due to the fact that RCU was not heavily used within DYNIX/ptx,
+so the very few places that needed something like
+<tt>synchronize_rcu()</tt> simply open-coded it.
+
+<p>@@QQ@@
+Earlier it was claimed that <tt>call_rcu()</tt> and
+<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
+by readers.
+But how can that be correct, given that the invocation of the callback
+and the freeing of the memory (respectively) must still wait for
+a grace period to elapse?
+<p>@@QQA@@
+We could define things this way, but keep in mind that this sort of
+definition would say that updates in garbage-collected languages
+cannot complete until the next time the garbage collector runs,
+which does not seem at all reasonable.
+The key point is that in most cases, an updater using either
+<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
+next update as soon as it has invoked <tt>call_rcu()</tt> or
+<tt>kfree_rcu()</tt>, without having to wait for a subsequent
+grace period.
+<p>@@QQE@@
+
+<p>
+But what if the updater must wait for the completion of code to be
+executed after the end of the grace period, but has other tasks
+that can be carried out in the meantime?
+The polling-style <tt>get_state_synchronize_rcu()</tt> and
+<tt>cond_synchronize_rcu()</tt> functions may be used for this
+purpose, as shown below:
+
+<blockquote>
+<pre>
+ 1 bool remove_gp_poll(void)
+ 2 {
+ 3 struct foo *p;
+ 4 unsigned long s;
+ 5
+ 6 spin_lock(&amp;gp_lock);
+ 7 p = rcu_access_pointer(gp);
+ 8 if (!p) {
+ 9 spin_unlock(&amp;gp_lock);
+10 return false;
+11 }
+12 rcu_assign_pointer(gp, NULL);
+13 spin_unlock(&amp;gp_lock);
+14 s = get_state_synchronize_rcu();
+15 do_something_while_waiting();
+16 cond_synchronize_rcu(s);
+17 kfree(p);
+18 return true;
+19 }
+</pre>
+</blockquote>
+
+<p>
+On line&nbsp;14, <tt>get_state_synchronize_rcu()</tt> obtains a
+&ldquo;cookie&rdquo; from RCU,
+then line&nbsp;15 carries out other tasks,
+and finally, line&nbsp;16 returns immediately if a grace period has
+elapsed in the meantime, but otherwise waits as required.
+The need for <tt>get_state_synchronize_rcu</tt> and
+<tt>cond_synchronize_rcu()</tt> has appeared quite recently,
+so it is too early to tell whether they will stand the test of time.
+
+<p>
+RCU thus provides a range of tools to allow updaters to strike the
+required tradeoff between latency, flexibility and CPU overhead.
+
+<h3><a name="Composability">Composability</a></h3>
+
+<p>
+Composability has received much attention in recent years, perhaps in part
+due to the collision of multicore hardware with object-oriented techniques
+designed in single-threaded environments for single-threaded use.
+And in theory, RCU read-side critical sections may be composed, and in
+fact may be nested arbitrarily deeply.
+In practice, as with all real-world implementations of composable
+constructs, there are limitations.
+
+<p>
+Implementations of RCU for which <tt>rcu_read_lock()</tt>
+and <tt>rcu_read_unlock()</tt> generate no code, such as
+Linux-kernel RCU when <tt>CONFIG_PREEMPT=n</tt>, can be
+nested arbitrarily deeply.
+After all, there is no overhead.
+Except that if all these instances of <tt>rcu_read_lock()</tt>
+and <tt>rcu_read_unlock()</tt> are visible to the compiler,
+compilation will eventually fail due to exhausting memory,
+mass storage, or user patience, whichever comes first.
+If the nesting is not visible to the compiler, as is the case with
+mutually recursive functions each in its own translation unit,
+stack overflow will result.
+If the nesting takes the form of loops, either the control variable
+will overflow or (in the Linux kernel) you will get an RCU CPU stall warning.
+Nevertheless, this class of RCU implementations is one
+of the most composable constructs in existence.
+
+<p>
+RCU implementations that explicitly track nesting depth
+are limited by the nesting-depth counter.
+For example, the Linux kernel's preemptible RCU limits nesting to
+<tt>INT_MAX</tt>.
+This should suffice for almost all practical purposes.
+That said, a consecutive pair of RCU read-side critical sections
+between which there is an operation that waits for a grace period
+cannot be enclosed in another RCU read-side critical section.
+This is because it is not legal to wait for a grace period within
+an RCU read-side critical section: To do so would result either
+in deadlock or
+in RCU implicitly splitting the enclosing RCU read-side critical
+section, neither of which is conducive to a long-lived and prosperous
+kernel.
+
+<p>
+It is worth noting that RCU is not alone in limiting composability.
+For example, many transactional-memory implementations prohibit
+composing a pair of transactions separated by an irrevocable
+operation (for example, a network receive operation).
+For another example, lock-based critical sections can be composed
+surprisingly freely, but only if deadlock is avoided.
+
+<p>
+In short, although RCU read-side critical sections are highly composable,
+care is required in some situations, just as is the case for any other
+composable synchronization mechanism.
+
+<h3><a name="Corner Cases">Corner Cases</a></h3>
+
+<p>
+A given RCU workload might have an endless and intense stream of
+RCU read-side critical sections, perhaps even so intense that there
+was never a point in time during which there was not at least one
+RCU read-side critical section in flight.
+RCU cannot allow this situation to block grace periods: As long as
+all the RCU read-side critical sections are finite, grace periods
+must also be finite.
+
+<p>
+That said, preemptible RCU implementations could potentially result
+in RCU read-side critical sections being preempted for long durations,
+which has the effect of creating a long-duration RCU read-side
+critical section.
+This situation can arise only in heavily loaded systems, but systems using
+real-time priorities are of course more vulnerable.
+Therefore, RCU priority boosting is provided to help deal with this
+case.
+That said, the exact requirements on RCU priority boosting will likely
+evolve as more experience accumulates.
+
+<p>
+Other workloads might have very high update rates.
+Although one can argue that such workloads should instead use
+something other than RCU, the fact remains that RCU must
+handle such workloads gracefully.
+This requirement is another factor driving batching of grace periods,
+but it is also the driving force behind the checks for large numbers
+of queued RCU callbacks in the <tt>call_rcu()</tt> code path.
+Finally, high update rates should not delay RCU read-side critical
+sections, although some read-side delays can occur when using
+<tt>synchronize_rcu_expedited()</tt>, courtesy of this function's use
+of <tt>try_stop_cpus()</tt>.
+(In the future, <tt>synchronize_rcu_expedited()</tt> will be
+converted to use lighter-weight inter-processor interrupts (IPIs),
+but this will still disturb readers, though to a much smaller degree.)
+
+<p>
+Although all three of these corner cases were understood in the early
+1990s, a simple user-level test consisting of <tt>close(open(path))</tt>
+in a tight loop
+in the early 2000s suddenly provided a much deeper appreciation of the
+high-update-rate corner case.
+This test also motivated addition of some RCU code to react to high update
+rates, for example, if a given CPU finds itself with more than 10,000
+RCU callbacks queued, it will cause RCU to take evasive action by
+more aggressively starting grace periods and more aggressively forcing
+completion of grace-period processing.
+This evasive action causes the grace period to complete more quickly,
+but at the cost of restricting RCU's batching optimizations, thus
+increasing the CPU overhead incurred by that grace period.
+
+<h2><a name="Software-Engineering Requirements">
+Software-Engineering Requirements</a></h2>
+
+<p>
+Between Murphy's Law and &ldquo;To err is human&rdquo;, it is necessary to
+guard against mishaps and misuse:
+
+<ol>
+<li> It is all too easy to forget to use <tt>rcu_read_lock()</tt>
+ everywhere that it is needed, so kernels built with
+ <tt>CONFIG_PROVE_RCU=y</tt> will spat if
+ <tt>rcu_dereference()</tt> is used outside of an
+ RCU read-side critical section.
+ Update-side code can use <tt>rcu_dereference_protected()</tt>,
+ which takes a
+ <a href="https://lwn.net/Articles/371986/">lockdep expression</a>
+ to indicate what is providing the protection.
+ If the indicated protection is not provided, a lockdep splat
+ is emitted.
+
+ <p>
+ Code shared between readers and updaters can use
+ <tt>rcu_dereference_check()</tt>, which also takes a
+ lockdep expression, and emits a lockdep splat if neither
+ <tt>rcu_read_lock()</tt> nor the indicated protection
+ is in place.
+ In addition, <tt>rcu_dereference_raw()</tt> is used in those
+ (hopefully rare) cases where the required protection cannot
+ be easily described.
+ Finally, <tt>rcu_read_lock_held()</tt> is provided to
+ allow a function to verify that it has been invoked within
+ an RCU read-side critical section.
+ I was made aware of this set of requirements shortly after Thomas
+ Gleixner audited a number of RCU uses.
+<li> A given function might wish to check for RCU-related preconditions
+ upon entry, before using any other RCU API.
+ The <tt>rcu_lockdep_assert()</tt> does this job,
+ asserting the expression in kernels having lockdep enabled
+ and doing nothing otherwise.
+<li> It is also easy to forget to use <tt>rcu_assign_pointer()</tt>
+ and <tt>rcu_dereference()</tt>, perhaps (incorrectly)
+ substituting a simple assignment.
+ To catch this sort of error, a given RCU-protected pointer may be
+ tagged with <tt>__rcu</tt>, after which running sparse
+ with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt> will complain
+ about simple-assignment accesses to that pointer.
+ Arnd Bergmann made me aware of this requirement, and also
+ supplied the needed
+ <a href="https://lwn.net/Articles/376011/">patch series</a>.
+<li> Kernels built with <tt>CONFIG_DEBUG_OBJECTS_RCU_HEAD=y</tt>
+ will splat if a data element is passed to <tt>call_rcu()</tt>
+ twice in a row, without a grace period in between.
+ (This error is similar to a double free.)
+ The corresponding <tt>rcu_head</tt> structures that are
+ dynamically allocated are automatically tracked, but
+ <tt>rcu_head</tt> structures allocated on the stack
+ must be initialized with <tt>init_rcu_head_on_stack()</tt>
+ and cleaned up with <tt>destroy_rcu_head_on_stack()</tt>.
+ Similarly, statically allocated non-stack <tt>rcu_head</tt>
+ structures must be initialized with <tt>init_rcu_head()</tt>
+ and cleaned up with <tt>destroy_rcu_head()</tt>.
+ Mathieu Desnoyers made me aware of this requirement, and also
+ supplied the needed
+ <a href="https://lkml.kernel.org/g/20100319013024.GA28456@Krystal">patch</a>.
+<li> An infinite loop in an RCU read-side critical section will
+ eventually trigger an RCU CPU stall warning splat, with
+ the duration of &ldquo;eventually&rdquo; being controlled by the
+ <tt>RCU_CPU_STALL_TIMEOUT</tt> <tt>Kconfig</tt> option, or,
+ alternatively, by the
+ <tt>rcupdate.rcu_cpu_stall_timeout</tt> boot/sysfs
+ parameter.
+ However, RCU is not obligated to produce this splat
+ unless there is a grace period waiting on that particular
+ RCU read-side critical section.
+ <p>
+ Some extreme workloads might intentionally delay
+ RCU grace periods, and systems running those workloads can
+ be booted with <tt>rcupdate.rcu_cpu_stall_suppress</tt>
+ to suppress the splats.
+ This kernel parameter may also be set via <tt>sysfs</tt>.
+ Furthermore, RCU CPU stall warnings are counter-productive
+ during sysrq dumps and during panics.
+ RCU therefore supplies the <tt>rcu_sysrq_start()</tt> and
+ <tt>rcu_sysrq_end()</tt> API members to be called before
+ and after long sysrq dumps.
+ RCU also supplies the <tt>rcu_panic()</tt> notifier that is
+ automatically invoked at the beginning of a panic to suppress
+ further RCU CPU stall warnings.
+
+ <p>
+ This requirement made itself known in the early 1990s, pretty
+ much the first time that it was necessary to debug a CPU stall.
+ That said, the initial implementation in DYNIX/ptx was quite
+ generic in comparison with that of Linux.
+<li> Although it would be very good to detect pointers leaking out
+ of RCU read-side critical sections, there is currently no
+ good way of doing this.
+ One complication is the need to distinguish between pointers
+ leaking and pointers that have been handed off from RCU to
+ some other synchronization mechanism, for example, reference
+ counting.
+<li> In kernels built with <tt>CONFIG_RCU_TRACE=y</tt>, RCU-related
+ information is provided via both debugfs and event tracing.
+<li> Open-coded use of <tt>rcu_assign_pointer()</tt> and
+ <tt>rcu_dereference()</tt> to create typical linked
+ data structures can be surprisingly error-prone.
+ Therefore, RCU-protected
+ <a href="https://lwn.net/Articles/609973/#RCU List APIs">linked lists</a>
+ and, more recently, RCU-protected
+ <a href="https://lwn.net/Articles/612100/">hash tables</a>
+ are available.
+ Many other special-purpose RCU-protected data structures are
+ available in the Linux kernel and the userspace RCU library.
+<li> Some linked structures are created at compile time, but still
+ require <tt>__rcu</tt> checking.
+ The <tt>RCU_POINTER_INITIALIZER()</tt> macro serves this
+ purpose.
+<li> It is not necessary to use <tt>rcu_assign_pointer()</tt>
+ when creating linked structures that are to be published via
+ a single external pointer.
+ The <tt>RCU_INIT_POINTER()</tt> macro is provided for
+ this task and also for assigning <tt>NULL</tt> pointers
+ at runtime.
+</ol>
+
+<p>
+This not a hard-and-fast list: RCU's diagnostic capabilities will
+continue to be guided by the number and type of usage bugs found
+in real-world RCU usage.
+
+<h2><a name="Linux Kernel Complications">Linux Kernel Complications</a></h2>
+
+<p>
+The Linux kernel provides an interesting environment for all kinds of
+software, including RCU.
+Some of the relevant points of interest are as follows:
+
+<ol>
+<li> <a href="#Configuration">Configuration</a>.
+<li> <a href="#Firmware Interface">Firmware Interface</a>.
+<li> <a href="#Early Boot">Early Boot</a>.
+<li> <a href="#Interrupts and NMIs">
+ Interrupts and non-maskable interrupts (NMIs)</a>.
+<li> <a href="#Loadable Modules">Loadable Modules</a>.
+<li> <a href="#Hotplug CPU">Hotplug CPU</a>.
+<li> <a href="#Scheduler and RCU">Scheduler and RCU</a>.
+<li> <a href="#Tracing and RCU">Tracing and RCU</a>.
+<li> <a href="#Energy Efficiency">Energy Efficiency</a>.
+<li> <a href="#Memory Efficiency">Memory Efficiency</a>.
+<li> <a href="#Performance, Scalability, Response Time, and Reliability">
+ Performance, Scalability, Response Time, and Reliability</a>.
+</ol>
+
+<p>
+This list is probably incomplete, but it does give a feel for the
+most notable Linux-kernel complications.
+Each of the following sections covers one of the above topics.
+
+<h3><a name="Configuration">Configuration</a></h3>
+
+<p>
+RCU's goal is automatic configuration, so that almost nobody
+needs to worry about RCU's <tt>Kconfig</tt> options.
+And for almost all users, RCU does in fact work well
+&ldquo;out of the box.&rdquo;
+
+<p>
+However, there are specialized use cases that are handled by
+kernel boot parameters and <tt>Kconfig</tt> options.
+Unfortunately, the <tt>Kconfig</tt> system will explicitly ask users
+about new <tt>Kconfig</tt> options, which requires almost all of them
+be hidden behind a <tt>CONFIG_RCU_EXPERT</tt> <tt>Kconfig</tt> option.
+
+<p>
+This all should be quite obvious, but the fact remains that
+Linus Torvalds recently had to
+<a href="https://lkml.kernel.org/g/CA+55aFy4wcCwaL4okTs8wXhGZ5h-ibecy_Meg9C4MNQrUnwMcg@mail.gmail.com">remind</a>
+me of this requirement.
+
+<h3><a name="Firmware Interface">Firmware Interface</a></h3>
+
+<p>
+In many cases, kernel obtains information about the system from the
+firmware, and sometimes things are lost in translation.
+Or the translation is accurate, but the original message is bogus.
+
+<p>
+For example, some systems' firmware overreports the number of CPUs,
+sometimes by a large factor.
+If RCU naively believed the firmware, as it used to do,
+it would create too many per-CPU kthreads.
+Although the resulting system will still run correctly, the extra
+kthreads needlessly consume memory and can cause confusion
+when they show up in <tt>ps</tt> listings.
+
+<p>
+RCU must therefore wait for a given CPU to actually come online before
+it can allow itself to believe that the CPU actually exists.
+The resulting &ldquo;ghost CPUs&rdquo; (which are never going to
+come online) cause a number of
+<a href="https://paulmck.livejournal.com/37494.html">interesting complications</a>.
+
+<h3><a name="Early Boot">Early Boot</a></h3>
+
+<p>
+The Linux kernel's boot sequence is an interesting process,
+and RCU is used early, even before <tt>rcu_init()</tt>
+is invoked.
+In fact, a number of RCU's primitives can be used as soon as the
+initial task's <tt>task_struct</tt> is available and the
+boot CPU's per-CPU variables are set up.
+The read-side primitives (<tt>rcu_read_lock()</tt>,
+<tt>rcu_read_unlock()</tt>, <tt>rcu_dereference()</tt>,
+and <tt>rcu_access_pointer()</tt>) will operate normally very early on,
+as will <tt>rcu_assign_pointer()</tt>.
+
+<p>
+Although <tt>call_rcu()</tt> may be invoked at any
+time during boot, callbacks are not guaranteed to be invoked until after
+the scheduler is fully up and running.
+This delay in callback invocation is due to the fact that RCU does not
+invoke callbacks until it is fully initialized, and this full initialization
+cannot occur until after the scheduler has initialized itself to the
+point where RCU can spawn and run its kthreads.
+In theory, it would be possible to invoke callbacks earlier,
+however, this is not a panacea because there would be severe restrictions
+on what operations those callbacks could invoke.
+
+<p>
+Perhaps surprisingly, <tt>synchronize_rcu()</tt>,
+<a href="#Bottom-Half Flavor"><tt>synchronize_rcu_bh()</tt></a>
+(<a href="#Bottom-Half Flavor">discussed below</a>),
+and
+<a href="#Sched Flavor"><tt>synchronize_sched()</tt></a>
+will all operate normally
+during very early boot, the reason being that there is only one CPU
+and preemption is disabled.
+This means that the call <tt>synchronize_rcu()</tt> (or friends)
+itself is a quiescent
+state and thus a grace period, so the early-boot implementation can
+be a no-op.
+
+<p>
+Both <tt>synchronize_rcu_bh()</tt> and <tt>synchronize_sched()</tt>
+continue to operate normally through the remainder of boot, courtesy
+of the fact that preemption is disabled across their RCU read-side
+critical sections and also courtesy of the fact that there is still
+only one CPU.
+However, once the scheduler starts initializing, preemption is enabled.
+There is still only a single CPU, but the fact that preemption is enabled
+means that the no-op implementation of <tt>synchronize_rcu()</tt> no
+longer works in <tt>CONFIG_PREEMPT=y</tt> kernels.
+Therefore, as soon as the scheduler starts initializing, the early-boot
+fastpath is disabled.
+This means that <tt>synchronize_rcu()</tt> switches to its runtime
+mode of operation where it posts callbacks, which in turn means that
+any call to <tt>synchronize_rcu()</tt> will block until the corresponding
+callback is invoked.
+Unfortunately, the callback cannot be invoked until RCU's runtime
+grace-period machinery is up and running, which cannot happen until
+the scheduler has initialized itself sufficiently to allow RCU's
+kthreads to be spawned.
+Therefore, invoking <tt>synchronize_rcu()</tt> during scheduler
+initialization can result in deadlock.
+
+<p>@@QQ@@
+So what happens with <tt>synchronize_rcu()</tt> during
+scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
+kernels?
+<p>@@QQA@@
+In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
+maps directly to <tt>synchronize_sched()</tt>.
+Therefore, <tt>synchronize_rcu()</tt> works normally throughout
+boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
+However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
+so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
+during scheduler initialization.
+<p>@@QQE@@
+
+<p>
+I learned of these boot-time requirements as a result of a series of
+system hangs.
+
+<h3><a name="Interrupts and NMIs">Interrupts and NMIs</a></h3>
+
+<p>
+The Linux kernel has interrupts, and RCU read-side critical sections are
+legal within interrupt handlers and within interrupt-disabled regions
+of code, as are invocations of <tt>call_rcu()</tt>.
+
+<p>
+Some Linux-kernel architectures can enter an interrupt handler from
+non-idle process context, and then just never leave it, instead stealthily
+transitioning back to process context.
+This trick is sometimes used to invoke system calls from inside the kernel.
+These &ldquo;half-interrupts&rdquo; mean that RCU has to be very careful
+about how it counts interrupt nesting levels.
+I learned of this requirement the hard way during a rewrite
+of RCU's dyntick-idle code.
+
+<p>
+The Linux kernel has non-maskable interrupts (NMIs), and
+RCU read-side critical sections are legal within NMI handlers.
+Thankfully, RCU update-side primitives, including
+<tt>call_rcu()</tt>, are prohibited within NMI handlers.
+
+<p>
+The name notwithstanding, some Linux-kernel architectures
+can have nested NMIs, which RCU must handle correctly.
+Andy Lutomirski
+<a href="https://lkml.kernel.org/g/CALCETrXLq1y7e_dKFPgou-FKHB6Pu-r8+t-6Ds+8=va7anBWDA@mail.gmail.com">surprised me</a>
+with this requirement;
+he also kindly surprised me with
+<a href="https://lkml.kernel.org/g/CALCETrXSY9JpW3uE6H8WYk81sg56qasA2aqmjMPsq5dOtzso=g@mail.gmail.com">an algorithm</a>
+that meets this requirement.
+
+<h3><a name="Loadable Modules">Loadable Modules</a></h3>
+
+<p>
+The Linux kernel has loadable modules, and these modules can
+also be unloaded.
+After a given module has been unloaded, any attempt to call
+one of its functions results in a segmentation fault.
+The module-unload functions must therefore cancel any
+delayed calls to loadable-module functions, for example,
+any outstanding <tt>mod_timer()</tt> must be dealt with
+via <tt>del_timer_sync()</tt> or similar.
+
+<p>
+Unfortunately, there is no way to cancel an RCU callback;
+once you invoke <tt>call_rcu()</tt>, the callback function is
+going to eventually be invoked, unless the system goes down first.
+Because it is normally considered socially irresponsible to crash the system
+in response to a module unload request, we need some other way
+to deal with in-flight RCU callbacks.
+
+<p>
+RCU therefore provides
+<tt><a href="https://lwn.net/Articles/217484/">rcu_barrier()</a></tt>,
+which waits until all in-flight RCU callbacks have been invoked.
+If a module uses <tt>call_rcu()</tt>, its exit function should therefore
+prevent any future invocation of <tt>call_rcu()</tt>, then invoke
+<tt>rcu_barrier()</tt>.
+In theory, the underlying module-unload code could invoke
+<tt>rcu_barrier()</tt> unconditionally, but in practice this would
+incur unacceptable latencies.
+
+<p>
+Nikita Danilov noted this requirement for an analogous filesystem-unmount
+situation, and Dipankar Sarma incorporated <tt>rcu_barrier()</tt> into RCU.
+The need for <tt>rcu_barrier()</tt> for module unloading became
+apparent later.
+
+<h3><a name="Hotplug CPU">Hotplug CPU</a></h3>
+
+<p>
+The Linux kernel supports CPU hotplug, which means that CPUs
+can come and go.
+It is of course illegal to use any RCU API member from an offline CPU.
+This requirement was present from day one in DYNIX/ptx, but
+on the other hand, the Linux kernel's CPU-hotplug implementation
+is &ldquo;interesting.&rdquo;
+
+<p>
+The Linux-kernel CPU-hotplug implementation has notifiers that
+are used to allow the various kernel subsystems (including RCU)
+to respond appropriately to a given CPU-hotplug operation.
+Most RCU operations may be invoked from CPU-hotplug notifiers,
+including even normal synchronous grace-period operations
+such as <tt>synchronize_rcu()</tt>.
+However, expedited grace-period operations such as
+<tt>synchronize_rcu_expedited()</tt> are not supported,
+due to the fact that current implementations block CPU-hotplug
+operations, which could result in deadlock.
+
+<p>
+In addition, all-callback-wait operations such as
+<tt>rcu_barrier()</tt> are also not supported, due to the
+fact that there are phases of CPU-hotplug operations where
+the outgoing CPU's callbacks will not be invoked until after
+the CPU-hotplug operation ends, which could also result in deadlock.
+
+<h3><a name="Scheduler and RCU">Scheduler and RCU</a></h3>
+
+<p>
+RCU depends on the scheduler, and the scheduler uses RCU to
+protect some of its data structures.
+This means the scheduler is forbidden from acquiring
+the runqueue locks and the priority-inheritance locks
+in the middle of an outermost RCU read-side critical section unless either
+(1)&nbsp;it releases them before exiting that same
+RCU read-side critical section, or
+(2)&nbsp;interrupts are disabled across
+that entire RCU read-side critical section.
+This same prohibition also applies (recursively!) to any lock that is acquired
+while holding any lock to which this prohibition applies.
+Adhering to this rule prevents preemptible RCU from invoking
+<tt>rcu_read_unlock_special()</tt> while either runqueue or
+priority-inheritance locks are held, thus avoiding deadlock.
+
+<p>
+Prior to v4.4, it was only necessary to disable preemption across
+RCU read-side critical sections that acquired scheduler locks.
+In v4.4, expedited grace periods started using IPIs, and these
+IPIs could force a <tt>rcu_read_unlock()</tt> to take the slowpath.
+Therefore, this expedited-grace-period change required disabling of
+interrupts, not just preemption.
+
+<p>
+For RCU's part, the preemptible-RCU <tt>rcu_read_unlock()</tt>
+implementation must be written carefully to avoid similar deadlocks.
+In particular, <tt>rcu_read_unlock()</tt> must tolerate an
+interrupt where the interrupt handler invokes both
+<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
+This possibility requires <tt>rcu_read_unlock()</tt> to use
+negative nesting levels to avoid destructive recursion via
+interrupt handler's use of RCU.
+
+<p>
+This pair of mutual scheduler-RCU requirements came as a
+<a href="https://lwn.net/Articles/453002/">complete surprise</a>.
+
+<p>
+As noted above, RCU makes use of kthreads, and it is necessary to
+avoid excessive CPU-time accumulation by these kthreads.
+This requirement was no surprise, but RCU's violation of it
+when running context-switch-heavy workloads when built with
+<tt>CONFIG_NO_HZ_FULL=y</tt>
+<a href="http://www.rdrop.com/users/paulmck/scalability/paper/BareMetal.2015.01.15b.pdf">did come as a surprise [PDF]</a>.
+RCU has made good progress towards meeting this requirement, even
+for context-switch-have <tt>CONFIG_NO_HZ_FULL=y</tt> workloads,
+but there is room for further improvement.
+
+<h3><a name="Tracing and RCU">Tracing and RCU</a></h3>
+
+<p>
+It is possible to use tracing on RCU code, but tracing itself
+uses RCU.
+For this reason, <tt>rcu_dereference_raw_notrace()</tt>
+is provided for use by tracing, which avoids the destructive
+recursion that could otherwise ensue.
+This API is also used by virtualization in some architectures,
+where RCU readers execute in environments in which tracing
+cannot be used.
+The tracing folks both located the requirement and provided the
+needed fix, so this surprise requirement was relatively painless.
+
+<h3><a name="Energy Efficiency">Energy Efficiency</a></h3>
+
+<p>
+Interrupting idle CPUs is considered socially unacceptable,
+especially by people with battery-powered embedded systems.
+RCU therefore conserves energy by detecting which CPUs are
+idle, including tracking CPUs that have been interrupted from idle.
+This is a large part of the energy-efficiency requirement,
+so I learned of this via an irate phone call.
+
+<p>
+Because RCU avoids interrupting idle CPUs, it is illegal to
+execute an RCU read-side critical section on an idle CPU.
+(Kernels built with <tt>CONFIG_PROVE_RCU=y</tt> will splat
+if you try it.)
+The <tt>RCU_NONIDLE()</tt> macro and <tt>_rcuidle</tt>
+event tracing is provided to work around this restriction.
+In addition, <tt>rcu_is_watching()</tt> may be used to
+test whether or not it is currently legal to run RCU read-side
+critical sections on this CPU.
+I learned of the need for diagnostics on the one hand
+and <tt>RCU_NONIDLE()</tt> on the other while inspecting
+idle-loop code.
+Steven Rostedt supplied <tt>_rcuidle</tt> event tracing,
+which is used quite heavily in the idle loop.
+
+<p>
+It is similarly socially unacceptable to interrupt an
+<tt>nohz_full</tt> CPU running in userspace.
+RCU must therefore track <tt>nohz_full</tt> userspace
+execution.
+And in
+<a href="https://lwn.net/Articles/558284/"><tt>CONFIG_NO_HZ_FULL_SYSIDLE=y</tt></a>
+kernels, RCU must separately track idle CPUs on the one hand and
+CPUs that are either idle or executing in userspace on the other.
+In both cases, RCU must be able to sample state at two points in
+time, and be able to determine whether or not some other CPU spent
+any time idle and/or executing in userspace.
+
+<p>
+These energy-efficiency requirements have proven quite difficult to
+understand and to meet, for example, there have been more than five
+clean-sheet rewrites of RCU's energy-efficiency code, the last of
+which was finally able to demonstrate
+<a href="http://www.rdrop.com/users/paulmck/realtime/paper/AMPenergy.2013.04.19a.pdf">real energy savings running on real hardware [PDF]</a>.
+As noted earlier,
+I learned of many of these requirements via angry phone calls:
+Flaming me on the Linux-kernel mailing list was apparently not
+sufficient to fully vent their ire at RCU's energy-efficiency bugs!
+
+<h3><a name="Memory Efficiency">Memory Efficiency</a></h3>
+
+<p>
+Although small-memory non-realtime systems can simply use Tiny RCU,
+code size is only one aspect of memory efficiency.
+Another aspect is the size of the <tt>rcu_head</tt> structure
+used by <tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>.
+Although this structure contains nothing more than a pair of pointers,
+it does appear in many RCU-protected data structures, including
+some that are size critical.
+The <tt>page</tt> structure is a case in point, as evidenced by
+the many occurrences of the <tt>union</tt> keyword within that structure.
+
+<p>
+This need for memory efficiency is one reason that RCU uses hand-crafted
+singly linked lists to track the <tt>rcu_head</tt> structures that
+are waiting for a grace period to elapse.
+It is also the reason why <tt>rcu_head</tt> structures do not contain
+debug information, such as fields tracking the file and line of the
+<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> that posted them.
+Although this information might appear in debug-only kernel builds at some
+point, in the meantime, the <tt>-&gt;func</tt> field will often provide
+the needed debug information.
+
+<p>
+However, in some cases, the need for memory efficiency leads to even
+more extreme measures.
+Returning to the <tt>page</tt> structure, the <tt>rcu_head</tt> field
+shares storage with a great many other structures that are used at
+various points in the corresponding page's lifetime.
+In order to correctly resolve certain
+<a href="https://lkml.kernel.org/g/1439976106-137226-1-git-send-email-kirill.shutemov@linux.intel.com">race conditions</a>,
+the Linux kernel's memory-management subsystem needs a particular bit
+to remain zero during all phases of grace-period processing,
+and that bit happens to map to the bottom bit of the
+<tt>rcu_head</tt> structure's <tt>-&gt;next</tt> field.
+RCU makes this guarantee as long as <tt>call_rcu()</tt>
+is used to post the callback, as opposed to <tt>kfree_rcu()</tt>
+or some future &ldquo;lazy&rdquo;
+variant of <tt>call_rcu()</tt> that might one day be created for
+energy-efficiency purposes.
+
+<h3><a name="Performance, Scalability, Response Time, and Reliability">
+Performance, Scalability, Response Time, and Reliability</a></h3>
+
+<p>
+Expanding on the
+<a href="#Performance and Scalability">earlier discussion</a>,
+RCU is used heavily by hot code paths in performance-critical
+portions of the Linux kernel's networking, security, virtualization,
+and scheduling code paths.
+RCU must therefore use efficient implementations, especially in its
+read-side primitives.
+To that end, it would be good if preemptible RCU's implementation
+of <tt>rcu_read_lock()</tt> could be inlined, however, doing
+this requires resolving <tt>#include</tt> issues with the
+<tt>task_struct</tt> structure.
+
+<p>
+The Linux kernel supports hardware configurations with up to
+4096 CPUs, which means that RCU must be extremely scalable.
+Algorithms that involve frequent acquisitions of global locks or
+frequent atomic operations on global variables simply cannot be
+tolerated within the RCU implementation.
+RCU therefore makes heavy use of a combining tree based on the
+<tt>rcu_node</tt> structure.
+RCU is required to tolerate all CPUs continuously invoking any
+combination of RCU's runtime primitives with minimal per-operation
+overhead.
+In fact, in many cases, increasing load must <i>decrease</i> the
+per-operation overhead, witness the batching optimizations for
+<tt>synchronize_rcu()</tt>, <tt>call_rcu()</tt>,
+<tt>synchronize_rcu_expedited()</tt>, and <tt>rcu_barrier()</tt>.
+As a general rule, RCU must cheerfully accept whatever the
+rest of the Linux kernel decides to throw at it.
+
+<p>
+The Linux kernel is used for real-time workloads, especially
+in conjunction with the
+<a href="https://rt.wiki.kernel.org/index.php/Main_Page">-rt patchset</a>.
+The real-time-latency response requirements are such that the
+traditional approach of disabling preemption across RCU
+read-side critical sections is inappropriate.
+Kernels built with <tt>CONFIG_PREEMPT=y</tt> therefore
+use an RCU implementation that allows RCU read-side critical
+sections to be preempted.
+This requirement made its presence known after users made it
+clear that an earlier
+<a href="https://lwn.net/Articles/107930/">real-time patch</a>
+did not meet their needs, in conjunction with some
+<a href="https://lkml.kernel.org/g/20050318002026.GA2693@us.ibm.com">RCU issues</a>
+encountered by a very early version of the -rt patchset.
+
+<p>
+In addition, RCU must make do with a sub-100-microsecond real-time latency
+budget.
+In fact, on smaller systems with the -rt patchset, the Linux kernel
+provides sub-20-microsecond real-time latencies for the whole kernel,
+including RCU.
+RCU's scalability and latency must therefore be sufficient for
+these sorts of configurations.
+To my surprise, the sub-100-microsecond real-time latency budget
+<a href="http://www.rdrop.com/users/paulmck/realtime/paper/bigrt.2013.01.31a.LCA.pdf">
+applies to even the largest systems [PDF]</a>,
+up to and including systems with 4096 CPUs.
+This real-time requirement motivated the grace-period kthread, which
+also simplified handling of a number of race conditions.
+
+<p>
+Finally, RCU's status as a synchronization primitive means that
+any RCU failure can result in arbitrary memory corruption that can be
+extremely difficult to debug.
+This means that RCU must be extremely reliable, which in
+practice also means that RCU must have an aggressive stress-test
+suite.
+This stress-test suite is called <tt>rcutorture</tt>.
+
+<p>
+Although the need for <tt>rcutorture</tt> was no surprise,
+the current immense popularity of the Linux kernel is posing
+interesting&mdash;and perhaps unprecedented&mdash;validation
+challenges.
+To see this, keep in mind that there are well over one billion
+instances of the Linux kernel running today, given Android
+smartphones, Linux-powered televisions, and servers.
+This number can be expected to increase sharply with the advent of
+the celebrated Internet of Things.
+
+<p>
+Suppose that RCU contains a race condition that manifests on average
+once per million years of runtime.
+This bug will be occurring about three times per <i>day</i> across
+the installed base.
+RCU could simply hide behind hardware error rates, given that no one
+should really expect their smartphone to last for a million years.
+However, anyone taking too much comfort from this thought should
+consider the fact that in most jurisdictions, a successful multi-year
+test of a given mechanism, which might include a Linux kernel,
+suffices for a number of types of safety-critical certifications.
+In fact, rumor has it that the Linux kernel is already being used
+in production for safety-critical applications.
+I don't know about you, but I would feel quite bad if a bug in RCU
+killed someone.
+Which might explain my recent focus on validation and verification.
+
+<h2><a name="Other RCU Flavors">Other RCU Flavors</a></h2>
+
+<p>
+One of the more surprising things about RCU is that there are now
+no fewer than five <i>flavors</i>, or API families.
+In addition, the primary flavor that has been the sole focus up to
+this point has two different implementations, non-preemptible and
+preemptible.
+The other four flavors are listed below, with requirements for each
+described in a separate section.
+
+<ol>
+<li> <a href="#Bottom-Half Flavor">Bottom-Half Flavor</a>
+<li> <a href="#Sched Flavor">Sched Flavor</a>
+<li> <a href="#Sleepable RCU">Sleepable RCU</a>
+<li> <a href="#Tasks RCU">Tasks RCU</a>
+</ol>
+
+<h3><a name="Bottom-Half Flavor">Bottom-Half Flavor</a></h3>
+
+<p>
+The softirq-disable (AKA &ldquo;bottom-half&rdquo;,
+hence the &ldquo;_bh&rdquo; abbreviations)
+flavor of RCU, or <i>RCU-bh</i>, was developed by
+Dipankar Sarma to provide a flavor of RCU that could withstand the
+network-based denial-of-service attacks researched by Robert
+Olsson.
+These attacks placed so much networking load on the system
+that some of the CPUs never exited softirq execution,
+which in turn prevented those CPUs from ever executing a context switch,
+which, in the RCU implementation of that time, prevented grace periods
+from ever ending.
+The result was an out-of-memory condition and a system hang.
+
+<p>
+The solution was the creation of RCU-bh, which does
+<tt>local_bh_disable()</tt>
+across its read-side critical sections, and which uses the transition
+from one type of softirq processing to another as a quiescent state
+in addition to context switch, idle, user mode, and offline.
+This means that RCU-bh grace periods can complete even when some of
+the CPUs execute in softirq indefinitely, thus allowing algorithms
+based on RCU-bh to withstand network-based denial-of-service attacks.
+
+<p>
+Because
+<tt>rcu_read_lock_bh()</tt> and <tt>rcu_read_unlock_bh()</tt>
+disable and re-enable softirq handlers, any attempt to start a softirq
+handlers during the
+RCU-bh read-side critical section will be deferred.
+In this case, <tt>rcu_read_unlock_bh()</tt>
+will invoke softirq processing, which can take considerable time.
+One can of course argue that this softirq overhead should be associated
+with the code following the RCU-bh read-side critical section rather
+than <tt>rcu_read_unlock_bh()</tt>, but the fact
+is that most profiling tools cannot be expected to make this sort
+of fine distinction.
+For example, suppose that a three-millisecond-long RCU-bh read-side
+critical section executes during a time of heavy networking load.
+There will very likely be an attempt to invoke at least one softirq
+handler during that three milliseconds, but any such invocation will
+be delayed until the time of the <tt>rcu_read_unlock_bh()</tt>.
+This can of course make it appear at first glance as if
+<tt>rcu_read_unlock_bh()</tt> was executing very slowly.
+
+<p>
+The
+<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-bh API</a>
+includes
+<tt>rcu_read_lock_bh()</tt>,
+<tt>rcu_read_unlock_bh()</tt>,
+<tt>rcu_dereference_bh()</tt>,
+<tt>rcu_dereference_bh_check()</tt>,
+<tt>synchronize_rcu_bh()</tt>,
+<tt>synchronize_rcu_bh_expedited()</tt>,
+<tt>call_rcu_bh()</tt>,
+<tt>rcu_barrier_bh()</tt>, and
+<tt>rcu_read_lock_bh_held()</tt>.
+
+<h3><a name="Sched Flavor">Sched Flavor</a></h3>
+
+<p>
+Before preemptible RCU, waiting for an RCU grace period had the
+side effect of also waiting for all pre-existing interrupt
+and NMI handlers.
+However, there are legitimate preemptible-RCU implementations that
+do not have this property, given that any point in the code outside
+of an RCU read-side critical section can be a quiescent state.
+Therefore, <i>RCU-sched</i> was created, which follows &ldquo;classic&rdquo;
+RCU in that an RCU-sched grace period waits for for pre-existing
+interrupt and NMI handlers.
+In kernels built with <tt>CONFIG_PREEMPT=n</tt>, the RCU and RCU-sched
+APIs have identical implementations, while kernels built with
+<tt>CONFIG_PREEMPT=y</tt> provide a separate implementation for each.
+
+<p>
+Note well that in <tt>CONFIG_PREEMPT=y</tt> kernels,
+<tt>rcu_read_lock_sched()</tt> and <tt>rcu_read_unlock_sched()</tt>
+disable and re-enable preemption, respectively.
+This means that if there was a preemption attempt during the
+RCU-sched read-side critical section, <tt>rcu_read_unlock_sched()</tt>
+will enter the scheduler, with all the latency and overhead entailed.
+Just as with <tt>rcu_read_unlock_bh()</tt>, this can make it look
+as if <tt>rcu_read_unlock_sched()</tt> was executing very slowly.
+However, the highest-priority task won't be preempted, so that task
+will enjoy low-overhead <tt>rcu_read_unlock_sched()</tt> invocations.
+
+<p>
+The
+<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-sched API</a>
+includes
+<tt>rcu_read_lock_sched()</tt>,
+<tt>rcu_read_unlock_sched()</tt>,
+<tt>rcu_read_lock_sched_notrace()</tt>,
+<tt>rcu_read_unlock_sched_notrace()</tt>,
+<tt>rcu_dereference_sched()</tt>,
+<tt>rcu_dereference_sched_check()</tt>,
+<tt>synchronize_sched()</tt>,
+<tt>synchronize_rcu_sched_expedited()</tt>,
+<tt>call_rcu_sched()</tt>,
+<tt>rcu_barrier_sched()</tt>, and
+<tt>rcu_read_lock_sched_held()</tt>.
+However, anything that disables preemption also marks an RCU-sched
+read-side critical section, including
+<tt>preempt_disable()</tt> and <tt>preempt_enable()</tt>,
+<tt>local_irq_save()</tt> and <tt>local_irq_restore()</tt>,
+and so on.
+
+<h3><a name="Sleepable RCU">Sleepable RCU</a></h3>
+
+<p>
+For well over a decade, someone saying &ldquo;I need to block within
+an RCU read-side critical section&rdquo; was a reliable indication
+that this someone did not understand RCU.
+After all, if you are always blocking in an RCU read-side critical
+section, you can probably afford to use a higher-overhead synchronization
+mechanism.
+However, that changed with the advent of the Linux kernel's notifiers,
+whose RCU read-side critical
+sections almost never sleep, but sometimes need to.
+This resulted in the introduction of
+<a href="https://lwn.net/Articles/202847/">sleepable RCU</a>,
+or <i>SRCU</i>.
+
+<p>
+SRCU allows different domains to be defined, with each such domain
+defined by an instance of an <tt>srcu_struct</tt> structure.
+A pointer to this structure must be passed in to each SRCU function,
+for example, <tt>synchronize_srcu(&amp;ss)</tt>, where
+<tt>ss</tt> is the <tt>srcu_struct</tt> structure.
+The key benefit of these domains is that a slow SRCU reader in one
+domain does not delay an SRCU grace period in some other domain.
+That said, one consequence of these domains is that read-side code
+must pass a &ldquo;cookie&rdquo; from <tt>srcu_read_lock()</tt>
+to <tt>srcu_read_unlock()</tt>, for example, as follows:
+
+<blockquote>
+<pre>
+ 1 int idx;
+ 2
+ 3 idx = srcu_read_lock(&amp;ss);
+ 4 do_something();
+ 5 srcu_read_unlock(&amp;ss, idx);
+</pre>
+</blockquote>
+
+<p>
+As noted above, it is legal to block within SRCU read-side critical sections,
+however, with great power comes great responsibility.
+If you block forever in one of a given domain's SRCU read-side critical
+sections, then that domain's grace periods will also be blocked forever.
+Of course, one good way to block forever is to deadlock, which can
+happen if any operation in a given domain's SRCU read-side critical
+section can block waiting, either directly or indirectly, for that domain's
+grace period to elapse.
+For example, this results in a self-deadlock:
+
+<blockquote>
+<pre>
+ 1 int idx;
+ 2
+ 3 idx = srcu_read_lock(&amp;ss);
+ 4 do_something();
+ 5 synchronize_srcu(&amp;ss);
+ 6 srcu_read_unlock(&amp;ss, idx);
+</pre>
+</blockquote>
+
+<p>
+However, if line&nbsp;5 acquired a mutex that was held across
+a <tt>synchronize_srcu()</tt> for domain <tt>ss</tt>,
+deadlock would still be possible.
+Furthermore, if line&nbsp;5 acquired a mutex that was held across
+a <tt>synchronize_srcu()</tt> for some other domain <tt>ss1</tt>,
+and if an <tt>ss1</tt>-domain SRCU read-side critical section
+acquired another mutex that was held across as <tt>ss</tt>-domain
+<tt>synchronize_srcu()</tt>,
+deadlock would again be possible.
+Such a deadlock cycle could extend across an arbitrarily large number
+of different SRCU domains.
+Again, with great power comes great responsibility.
+
+<p>
+Unlike the other RCU flavors, SRCU read-side critical sections can
+run on idle and even offline CPUs.
+This ability requires that <tt>srcu_read_lock()</tt> and
+<tt>srcu_read_unlock()</tt> contain memory barriers, which means
+that SRCU readers will run a bit slower than would RCU readers.
+It also motivates the <tt>smp_mb__after_srcu_read_unlock()</tt>
+API, which, in combination with <tt>srcu_read_unlock()</tt>,
+guarantees a full memory barrier.
+
+<p>
+The
+<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">SRCU API</a>
+includes
+<tt>srcu_read_lock()</tt>,
+<tt>srcu_read_unlock()</tt>,
+<tt>srcu_dereference()</tt>,
+<tt>srcu_dereference_check()</tt>,
+<tt>synchronize_srcu()</tt>,
+<tt>synchronize_srcu_expedited()</tt>,
+<tt>call_srcu()</tt>,
+<tt>srcu_barrier()</tt>, and
+<tt>srcu_read_lock_held()</tt>.
+It also includes
+<tt>DEFINE_SRCU()</tt>,
+<tt>DEFINE_STATIC_SRCU()</tt>, and
+<tt>init_srcu_struct()</tt>
+APIs for defining and initializing <tt>srcu_struct</tt> structures.
+
+<h3><a name="Tasks RCU">Tasks RCU</a></h3>
+
+<p>
+Some forms of tracing use &ldquo;tramopolines&rdquo; to handle the
+binary rewriting required to install different types of probes.
+It would be good to be able to free old trampolines, which sounds
+like a job for some form of RCU.
+However, because it is necessary to be able to install a trace
+anywhere in the code, it is not possible to use read-side markers
+such as <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
+In addition, it does not work to have these markers in the trampoline
+itself, because there would need to be instructions following
+<tt>rcu_read_unlock()</tt>.
+Although <tt>synchronize_rcu()</tt> would guarantee that execution
+reached the <tt>rcu_read_unlock()</tt>, it would not be able to
+guarantee that execution had completely left the trampoline.
+
+<p>
+The solution, in the form of
+<a href="https://lwn.net/Articles/607117/"><i>Tasks RCU</i></a>,
+is to have implicit
+read-side critical sections that are delimited by voluntary context
+switches, that is, calls to <tt>schedule()</tt>,
+<tt>cond_resched_rcu_qs()</tt>, and
+<tt>synchronize_rcu_tasks()</tt>.
+In addition, transitions to and from userspace execution also delimit
+tasks-RCU read-side critical sections.
+
+<p>
+The tasks-RCU API is quite compact, consisting only of
+<tt>call_rcu_tasks()</tt>,
+<tt>synchronize_rcu_tasks()</tt>, and
+<tt>rcu_barrier_tasks()</tt>.
+
+<h2><a name="Possible Future Changes">Possible Future Changes</a></h2>
+
+<p>
+One of the tricks that RCU uses to attain update-side scalability is
+to increase grace-period latency with increasing numbers of CPUs.
+If this becomes a serious problem, it will be necessary to rework the
+grace-period state machine so as to avoid the need for the additional
+latency.
+
+<p>
+Expedited grace periods scan the CPUs, so their latency and overhead
+increases with increasing numbers of CPUs.
+If this becomes a serious problem on large systems, it will be necessary
+to do some redesign to avoid this scalability problem.
+
+<p>
+RCU disables CPU hotplug in a few places, perhaps most notably in the
+expedited grace-period and <tt>rcu_barrier()</tt> operations.
+If there is a strong reason to use expedited grace periods in CPU-hotplug
+notifiers, it will be necessary to avoid disabling CPU hotplug.
+This would introduce some complexity, so there had better be a <i>very</i>
+good reason.
+
+<p>
+The tradeoff between grace-period latency on the one hand and interruptions
+of other CPUs on the other hand may need to be re-examined.
+The desire is of course for zero grace-period latency as well as zero
+interprocessor interrupts undertaken during an expedited grace period
+operation.
+While this ideal is unlikely to be achievable, it is quite possible that
+further improvements can be made.
+
+<p>
+The multiprocessor implementations of RCU use a combining tree that
+groups CPUs so as to reduce lock contention and increase cache locality.
+However, this combining tree does not spread its memory across NUMA
+nodes nor does it align the CPU groups with hardware features such
+as sockets or cores.
+Such spreading and alignment is currently believed to be unnecessary
+because the hotpath read-side primitives do not access the combining
+tree, nor does <tt>call_rcu()</tt> in the common case.
+If you believe that your architecture needs such spreading and alignment,
+then your architecture should also benefit from the
+<tt>rcutree.rcu_fanout_leaf</tt> boot parameter, which can be set
+to the number of CPUs in a socket, NUMA node, or whatever.
+If the number of CPUs is too large, use a fraction of the number of
+CPUs.
+If the number of CPUs is a large prime number, well, that certainly
+is an &ldquo;interesting&rdquo; architectural choice!
+More flexible arrangements might be considered, but only if
+<tt>rcutree.rcu_fanout_leaf</tt> has proven inadequate, and only
+if the inadequacy has been demonstrated by a carefully run and
+realistic system-level workload.
+
+<p>
+Please note that arrangements that require RCU to remap CPU numbers will
+require extremely good demonstration of need and full exploration of
+alternatives.
+
+<p>
+There is an embarrassingly large number of flavors of RCU, and this
+number has been increasing over time.
+Perhaps it will be possible to combine some at some future date.
+
+<p>
+RCU's various kthreads are reasonably recent additions.
+It is quite likely that adjustments will be required to more gracefully
+handle extreme loads.
+It might also be necessary to be able to relate CPU utilization by
+RCU's kthreads and softirq handlers to the code that instigated this
+CPU utilization.
+For example, RCU callback overhead might be charged back to the
+originating <tt>call_rcu()</tt> instance, though probably not
+in production kernels.
+
+<h2><a name="Summary">Summary</a></h2>
+
+<p>
+This document has presented more than two decade's worth of RCU
+requirements.
+Given that the requirements keep changing, this will not be the last
+word on this subject, but at least it serves to get an important
+subset of the requirements set forth.
+
+<h2><a name="Acknowledgments">Acknowledgments</a></h2>
+
+I am grateful to Steven Rostedt, Lai Jiangshan, Ingo Molnar,
+Oleg Nesterov, Borislav Petkov, Peter Zijlstra, Boqun Feng, and
+Andy Lutomirski for their help in rendering
+this article human readable, and to Michelle Rankin for her support
+of this effort.
+Other contributions are acknowledged in the Linux kernel's git archive.
+The cartoon is copyright (c) 2013 by Melissa Broussard,
+and is provided
+under the terms of the Creative Commons Attribution-Share Alike 3.0
+United States license.
+
+<p>@@QQAL@@
+
+</body></html>
diff --git a/Documentation/RCU/Design/htmlqqz.sh b/Documentation/RCU/Design/htmlqqz.sh
new file mode 100755
index 000000000000..d354f069559b
--- /dev/null
+++ b/Documentation/RCU/Design/htmlqqz.sh
@@ -0,0 +1,108 @@
+#!/bin/sh
+#
+# Usage: sh htmlqqz.sh file
+#
+# Extracts and converts quick quizzes in a proto-HTML document file.htmlx.
+# Commands, all of which must be on a line by themselves:
+#
+# "<p>@@QQ@@": Start of a quick quiz.
+# "<p>@@QQA@@": Start of a quick-quiz answer.
+# "<p>@@QQE@@": End of a quick-quiz answer, and thus of the quick quiz.
+# "<p>@@QQAL@@": Place to put quick-quiz answer list.
+#
+# Places the result in file.html.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (c) 2013 Paul E. McKenney, IBM Corporation.
+
+fn=$1
+if test ! -r $fn.htmlx
+then
+ echo "Error: $fn.htmlx unreadable."
+ exit 1
+fi
+
+echo "<!-- DO NOT HAND EDIT. -->" > $fn.html
+echo "<!-- Instead, edit $fn.htmlx and run 'sh htmlqqz.sh $fn' -->" >> $fn.html
+awk < $fn.htmlx >> $fn.html '
+
+state == "" && $1 != "<p>@@QQ@@" && $1 != "<p>@@QQAL@@" {
+ print $0;
+ if ($0 ~ /^<p>@@QQ/)
+ print "Bad Quick Quiz command: " NR " (expected <p>@@QQ@@ or <p>@@QQAL@@)." > "/dev/stderr"
+ next;
+}
+
+state == "" && $1 == "<p>@@QQ@@" {
+ qqn++;
+ qqlineno = NR;
+ haveqq = 1;
+ state = "qq";
+ print "<p><a name=\"Quick Quiz " qqn "\"><b>Quick Quiz " qqn "</b>:</a>"
+ next;
+}
+
+state == "qq" && $1 != "<p>@@QQA@@" {
+ qq[qqn] = qq[qqn] $0 "\n";
+ print $0
+ if ($0 ~ /^<p>@@QQ/)
+ print "Bad Quick Quiz command: " NR ". (expected <p>@@QQA@@)" > "/dev/stderr"
+ next;
+}
+
+state == "qq" && $1 == "<p>@@QQA@@" {
+ state = "qqa";
+ print "<br><a href=\"#qq" qqn "answer\">Answer</a>"
+ next;
+}
+
+state == "qqa" && $1 != "<p>@@QQE@@" {
+ qqa[qqn] = qqa[qqn] $0 "\n";
+ if ($0 ~ /^<p>@@QQ/)
+ print "Bad Quick Quiz command: " NR " (expected <p>@@QQE@@)." > "/dev/stderr"
+ next;
+}
+
+state == "qqa" && $1 == "<p>@@QQE@@" {
+ state = "";
+ next;
+}
+
+state == "" && $1 == "<p>@@QQAL@@" {
+ haveqq = "";
+ print "<h3><a name=\"Answers to Quick Quizzes\">"
+ print "Answers to Quick Quizzes</a></h3>"
+ print "";
+ for (i = 1; i <= qqn; i++) {
+ print "<a name=\"qq" i "answer\"></a>"
+ print "<p><b>Quick Quiz " i "</b>:"
+ print qq[i];
+ print "";
+ print "</p><p><b>Answer</b>:"
+ print qqa[i];
+ print "";
+ print "</p><p><a href=\"#Quick%20Quiz%20" i "\"><b>Back to Quick Quiz " i "</b>.</a>"
+ print "";
+ }
+ next;
+}
+
+END {
+ if (state != "")
+ print "Unterminated Quick Quiz: " qqlineno "." > "/dev/stderr"
+ else if (haveqq)
+ print "Missing \"<p>@@QQAL@@\", no Quick Quiz." > "/dev/stderr"
+}'
diff --git a/Documentation/arm/keystone/Overview.txt b/Documentation/arm/keystone/Overview.txt
index f17bc4c9dff9..400c0c270d2e 100644
--- a/Documentation/arm/keystone/Overview.txt
+++ b/Documentation/arm/keystone/Overview.txt
@@ -49,24 +49,6 @@ specified through DTS. Following are the DTS used:-
The device tree documentation for the keystone machines are located at
Documentation/devicetree/bindings/arm/keystone/keystone.txt
-Known issues & workaround
--------------------------
-
-Some of the device drivers used on keystone are re-used from that from
-DaVinci and other TI SoCs. These device drivers may use clock APIs directly.
-Some of the keystone specific drivers such as netcp uses run time power
-management API instead to enable clock. As this API has limitations on
-keystone, following workaround is needed to boot Linux.
-
- Add 'clk_ignore_unused' to the bootargs env variable in u-boot. Otherwise
- clock frameworks will try to disable clocks that are unused and disable
- the hardware. This is because netcp related power domain and clock
- domains are enabled in u-boot as run time power management API currently
- doesn't enable clocks for netcp due to a limitation. This workaround is
- expected to be removed in the future when proper API support becomes
- available. Until then, this work around is needed.
-
-
Document Author
---------------
Murali Karicheri <m-karicheri2@ti.com>
diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt
index 2f6c6ff7161d..d8880ca30af4 100644
--- a/Documentation/block/null_blk.txt
+++ b/Documentation/block/null_blk.txt
@@ -70,3 +70,6 @@ use_per_node_hctx=[0/1]: Default: 0
parameter.
1: The multi-queue block layer is instantiated with a hardware dispatch
queue for each CPU node in the system.
+
+use_lightnvm=[0/1]: Default: 0
+ Register device with LightNVM. Requires blk-mq to be used.
diff --git a/Documentation/devicetree/bindings/dma/ti-edma.txt b/Documentation/devicetree/bindings/dma/ti-edma.txt
index d3d0a4fb1c73..079b42a81d7c 100644
--- a/Documentation/devicetree/bindings/dma/ti-edma.txt
+++ b/Documentation/devicetree/bindings/dma/ti-edma.txt
@@ -22,8 +22,7 @@ Required properties:
Optional properties:
- ti,hwmods: Name of the hwmods associated to the eDMA CC
- ti,edma-memcpy-channels: List of channels allocated to be used for memcpy, iow
- these channels will be SW triggered channels. The list must
- contain 16 bits numbers, see example.
+ these channels will be SW triggered channels. See example.
- ti,edma-reserved-slot-ranges: PaRAM slot ranges which should not be used by
the driver, they are allocated to be used by for example the
DSP. See example.
@@ -56,10 +55,9 @@ edma: edma@49000000 {
ti,tptcs = <&edma_tptc0 7>, <&edma_tptc1 7>, <&edma_tptc2 0>;
/* Channel 20 and 21 is allocated for memcpy */
- ti,edma-memcpy-channels = /bits/ 16 <20 21>;
- /* The following PaRAM slots are reserved: 35-45 and 100-110 */
- ti,edma-reserved-slot-ranges = /bits/ 16 <35 10>,
- /bits/ 16 <100 10>;
+ ti,edma-memcpy-channels = <20 21>;
+ /* The following PaRAM slots are reserved: 35-44 and 100-109 */
+ ti,edma-reserved-slot-ranges = <35 10>, <100 10>;
};
edma_tptc0: tptc@49800000 {
diff --git a/Documentation/devicetree/bindings/gpio/gpio-mpc8xxx.txt b/Documentation/devicetree/bindings/gpio/gpio-mpc8xxx.txt
index f2455c50533d..120bc4971cf3 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-mpc8xxx.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-mpc8xxx.txt
@@ -11,6 +11,10 @@ Required properties:
0 = active high
1 = active low
+Optional properties:
+- little-endian : GPIO registers are used as little endian. If not
+ present registers are used as big endian by default.
+
Example:
gpio0: gpio@1100 {
diff --git a/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt b/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
index b9c32f6fd687..4357e498ef04 100644
--- a/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
+++ b/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
@@ -12,7 +12,7 @@ Each key is represented as a sub-node of "allwinner,sun4i-a10-lradc-keys":
Required subnode-properties:
- label: Descriptive name of the key.
- linux,code: Keycode to emit.
- - channel: Channel this key is attached to, mut be 0 or 1.
+ - channel: Channel this key is attached to, must be 0 or 1.
- voltage: Voltage in µV at lradc input when this key is pressed.
Example:
diff --git a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun67i-sc-nmi.txt b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sunxi-nmi.txt
index d1c5cdabc3e0..81cd3692405e 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun67i-sc-nmi.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sunxi-nmi.txt
@@ -4,7 +4,7 @@ Allwinner Sunxi NMI Controller
Required properties:
- compatible : should be "allwinner,sun7i-a20-sc-nmi" or
- "allwinner,sun6i-a31-sc-nmi"
+ "allwinner,sun6i-a31-sc-nmi" or "allwinner,sun9i-a80-nmi"
- reg : Specifies base physical address and size of the registers.
- interrupt-controller : Identifies the node as an interrupt controller
- #interrupt-cells : Specifies the number of cells needed to encode an
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
index cc56021eb60b..5a1cb4bc3dfe 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
@@ -18,6 +18,7 @@ Main node required properties:
"arm,cortex-a9-gic"
"arm,gic-400"
"arm,pl390"
+ "arm,tc11mp-gic"
"brcm,brahma-b15-gic"
"qcom,msm-8660-qgic"
"qcom,msm-qgic2"
diff --git a/Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.txt b/Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.txt
new file mode 100644
index 000000000000..720f7c92e9a1
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.txt
@@ -0,0 +1,74 @@
+Hisilicon mbigen device tree bindings.
+=======================================
+
+Mbigen means: message based interrupt generator.
+
+MBI is kind of msi interrupt only used on Non-PCI devices.
+
+To reduce the wired interrupt number connected to GIC,
+Hisilicon designed mbigen to collect and generate interrupt.
+
+
+Non-pci devices can connect to mbigen and generate the
+interrupt by writing ITS register.
+
+The mbigen chip and devices connect to mbigen have the following properties:
+
+Mbigen main node required properties:
+-------------------------------------------
+- compatible: Should be "hisilicon,mbigen-v2"
+
+- reg: Specifies the base physical address and size of the Mbigen
+ registers.
+
+- interrupt controller: Identifies the node as an interrupt controller
+
+- msi-parent: Specifies the MSI controller this mbigen use.
+ For more detail information,please refer to the generic msi-parent binding in
+ Documentation/devicetree/bindings/interrupt-controller/msi.txt.
+
+- num-pins: the total number of pins implemented in this Mbigen
+ instance.
+
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source. The value must be 2.
+
+ The 1st cell is hardware pin number of the interrupt.This number is local to
+ each mbigen chip and in the range from 0 to the maximum interrupts number
+ of the mbigen.
+
+ The 2nd cell is the interrupt trigger type.
+ The value of this cell should be:
+ 1: rising edge triggered
+ or
+ 4: high level triggered
+
+Examples:
+
+ mbigen_device_gmac:intc {
+ compatible = "hisilicon,mbigen-v2";
+ reg = <0x0 0xc0080000 0x0 0x10000>;
+ interrupt-controller;
+ msi-parent = <&its_dsa 0x40b1c>;
+ num-pins = <9>;
+ #interrupt-cells = <2>;
+ };
+
+Devices connect to mbigen required properties:
+----------------------------------------------------
+-interrupt-parent: Specifies the mbigen device node which device connected.
+
+-interrupts:Specifies the interrupt source.
+ For the specific information of each cell in this property,please refer to
+ the "interrupt-cells" description mentioned above.
+
+Examples:
+ gmac0: ethernet@c2080000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0 0xc2080000 0 0x20000>,
+ <0 0xc0000000 0 0x1000>;
+ interrupt-parent = <&mbigen_device_gmac>;
+ interrupts = <656 1>,
+ <657 1>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/technologic,ts4800.txt b/Documentation/devicetree/bindings/interrupt-controller/technologic,ts4800.txt
new file mode 100644
index 000000000000..7f15f1b0325b
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/technologic,ts4800.txt
@@ -0,0 +1,16 @@
+TS-4800 FPGA interrupt controller
+
+TS-4800 FPGA has an internal interrupt controller. When one of the
+interrupts is triggered, the SoC is notified, usually using a GPIO as
+parent interrupt source.
+
+Required properties:
+- compatible: should be "technologic,ts4800-irqc"
+- interrupt-controller: identifies the node as an interrupt controller
+- reg: physical base address of the controller and length of memory mapped
+ region
+- #interrupt-cells: specifies the number of cells needed to encode an interrupt
+ source, should be 1.
+- interrupt-parent: phandle to the parent interrupt controller this one is
+ cascaded from
+- interrupts: specifies the interrupt line in the interrupt-parent controller
diff --git a/Documentation/devicetree/bindings/mtd/partition.txt b/Documentation/devicetree/bindings/mtd/partition.txt
index f1e2a02381a4..1c63e40659fc 100644
--- a/Documentation/devicetree/bindings/mtd/partition.txt
+++ b/Documentation/devicetree/bindings/mtd/partition.txt
@@ -6,7 +6,9 @@ used for what purposes, but which don't use an on-flash partition table such
as RedBoot.
The partition table should be a subnode of the mtd node and should be named
-'partitions'. Partitions are defined in subnodes of the partitions node.
+'partitions'. This node should have the following property:
+- compatible : (required) must be "fixed-partitions"
+Partitions are then defined in subnodes of the partitions node.
For backwards compatibility partitions as direct subnodes of the mtd device are
supported. This use is discouraged.
@@ -36,6 +38,7 @@ Examples:
flash@0 {
partitions {
+ compatible = "fixed-partitions";
#address-cells = <1>;
#size-cells = <1>;
@@ -53,6 +56,7 @@ flash@0 {
flash@1 {
partitions {
+ compatible = "fixed-partitions";
#address-cells = <1>;
#size-cells = <2>;
@@ -66,6 +70,7 @@ flash@1 {
flash@2 {
partitions {
+ compatible = "fixed-partitions";
#address-cells = <2>;
#size-cells = <2>;
diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt
index 9853f8e70966..28a4781ab6d7 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -40,18 +40,18 @@ Optional properties:
Slave Properties:
Required properties:
-- phy_id : Specifies slave phy id
- phy-mode : See ethernet.txt file in the same directory
Optional properties:
- dual_emac_res_vlan : Specifies VID to be used to segregate the ports
- mac-address : See ethernet.txt file in the same directory
+- phy_id : Specifies slave phy id
- phy-handle : See ethernet.txt file in the same directory
Slave sub-nodes:
- fixed-link : See fixed-link.txt file in the same directory
- Either the properties phy_id and phy-mode,
- or the sub-node fixed-link can be specified
+ Either the property phy_id, or the sub-node
+ fixed-link can be specified
Note: "ti,hwmods" field is used to fetch the base address and irq
resources from TI, omap hwmod data base during device registration.
diff --git a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
index f5a8ca29aff0..aeea50c84e92 100644
--- a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
+++ b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
@@ -8,6 +8,11 @@ Required properties:
- phy-mode: See ethernet.txt file in the same directory
- clocks: a pointer to the reference clock for this device.
+Optional properties:
+- tx-csum-limit: maximum mtu supported by port that allow TX checksum.
+ Value is presented in bytes. If not used, by default 1600B is set for
+ "marvell,armada-370-neta" and 9800B for others.
+
Example:
ethernet@d0070000 {
@@ -15,6 +20,7 @@ ethernet@d0070000 {
reg = <0xd0070000 0x2500>;
interrupts = <8>;
clocks = <&gate_clk 4>;
+ tx-csum-limit = <9800>
status = "okay";
phy = <&phy0>;
phy-mode = "rgmii-id";
diff --git a/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
index b38200d2583a..0dfa60d88dd3 100644
--- a/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
@@ -1,7 +1,9 @@
* Temperature Sensor ADC (TSADC) on rockchip SoCs
Required properties:
-- compatible : "rockchip,rk3288-tsadc"
+- compatible : should be "rockchip,<name>-tsadc"
+ "rockchip,rk3288-tsadc": found on RK3288 SoCs
+ "rockchip,rk3368-tsadc": found on RK3368 SoCs
- reg : physical base address of the controller and length of memory mapped
region.
- interrupts : The interrupt number to the cpu. The interrupt specifier format
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 06d443450f21..619af9bfdcb3 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -50,8 +50,7 @@ prototypes:
int (*rename2) (struct inode *, struct dentry *,
struct inode *, struct dentry *, unsigned int);
int (*readlink) (struct dentry *, char __user *,int);
- const char *(*follow_link) (struct dentry *, void **);
- void (*put_link) (struct inode *, void *);
+ const char *(*get_link) (struct dentry *, struct inode *, void **);
void (*truncate) (struct inode *);
int (*permission) (struct inode *, int, unsigned int);
int (*get_acl)(struct inode *, int);
@@ -83,8 +82,7 @@ rmdir: yes (both) (see below)
rename: yes (all) (see below)
rename2: yes (all) (see below)
readlink: no
-follow_link: no
-put_link: no
+get_link: no
setattr: yes
permission: no (may not block if called in rcu-walk mode)
get_acl: no
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index f24d1b833957..0f88e6020487 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -504,3 +504,20 @@ in your dentry operations instead.
[mandatory]
__fd_install() & fd_install() can now sleep. Callers should not
hold a spinlock or other resources that do not allow a schedule.
+--
+[mandatory]
+ any symlink that might use page_follow_link_light/page_put_link() must
+ have inode_nohighmem(inode) called before anything might start playing with
+ its pagecache.
+--
+[mandatory]
+ ->follow_link() is replaced with ->get_link(); same API, except that
+ * ->get_link() gets inode as a separate argument
+ * ->get_link() may be called in RCU mode - in that case NULL
+ dentry is passed
+--
+[mandatory]
+ ->get_link() gets struct delayed_call *done now, and should do
+ set_delayed_call() where it used to set *cookie.
+ ->put_link() is gone - just give the destructor to set_delayed_call()
+ in ->get_link().
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 8c6f07ad373a..b02a7d598258 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -350,8 +350,8 @@ struct inode_operations {
int (*rename2) (struct inode *, struct dentry *,
struct inode *, struct dentry *, unsigned int);
int (*readlink) (struct dentry *, char __user *,int);
- const char *(*follow_link) (struct dentry *, void **);
- void (*put_link) (struct inode *, void *);
+ const char *(*get_link) (struct dentry *, struct inode *,
+ struct delayed_call *);
int (*permission) (struct inode *, int);
int (*get_acl)(struct inode *, int);
int (*setattr) (struct dentry *, struct iattr *);
@@ -434,20 +434,19 @@ otherwise noted.
readlink: called by the readlink(2) system call. Only required if
you want to support reading symbolic links
- follow_link: called by the VFS to follow a symbolic link to the
+ get_link: called by the VFS to follow a symbolic link to the
inode it points to. Only required if you want to support
symbolic links. This method returns the symlink body
to traverse (and possibly resets the current position with
nd_jump_link()). If the body won't go away until the inode
is gone, nothing else is needed; if it needs to be otherwise
- pinned, the data needed to release whatever we'd grabbed
- is to be stored in void * variable passed by address to
- follow_link() instance.
-
- put_link: called by the VFS to release resources allocated by
- follow_link(). The cookie stored by follow_link() is passed
- to this method as the last parameter; only called when
- cookie isn't NULL.
+ pinned, arrange for its release by having get_link(..., ..., done)
+ do set_delayed_call(done, destructor, argument).
+ In that case destructor(argument) will be called once VFS is
+ done with the body you've returned.
+ May be called in RCU mode; that is indicated by NULL dentry
+ argument. If request can't be handled without leaving RCU mode,
+ have it return ERR_PTR(-ECHILD).
permission: called by the VFS to check for access rights on a POSIX-like
filesystem.
diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801
index 6a4b1af724f8..1bba38dd2637 100644
--- a/Documentation/i2c/busses/i2c-i801
+++ b/Documentation/i2c/busses/i2c-i801
@@ -32,6 +32,7 @@ Supported adapters:
* Intel Sunrise Point-LP (PCH)
* Intel DNV (SOC)
* Intel Broxton (SOC)
+ * Intel Lewisburg (PCH)
Datasheets: Publicly available at the Intel website
On Intel Patsburg and later chipsets, both the normal host SMBus controller
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index f8aae632f02f..1a8169ba29e6 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -472,6 +472,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
Change the amount of debugging information output
when initialising the APIC and IO-APIC components.
+ apic_extnmi= [APIC,X86] External NMI delivery setting
+ Format: { bsp (default) | all | none }
+ bsp: External NMI is delivered only to CPU 0
+ all: External NMIs are broadcast to all CPUs as a
+ backup of CPU 0
+ none: External NMI is masked for all CPUs. This is
+ useful so that a dump capture kernel won't be
+ shot down by NMI
+
autoconf= [IPV6]
See Documentation/networking/ipv6.txt.
@@ -1583,9 +1592,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
hwp_only
Only load intel_pstate on systems which support
hardware P state control (HWP) if available.
- no_acpi
- Don't use ACPI processor performance control objects
- _PSS and _PPC specified limits.
intremap= [X86-64, Intel-IOMMU]
on enable Interrupt Remapping (default)
@@ -3299,18 +3305,35 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
rcutorture.verbose= [KNL]
Enable additional printk() statements.
+ rcupdate.rcu_cpu_stall_suppress= [KNL]
+ Suppress RCU CPU stall warning messages.
+
+ rcupdate.rcu_cpu_stall_timeout= [KNL]
+ Set timeout for RCU CPU stall warning messages.
+
rcupdate.rcu_expedited= [KNL]
Use expedited grace-period primitives, for
example, synchronize_rcu_expedited() instead
of synchronize_rcu(). This reduces latency,
but can increase CPU utilization, degrade
real-time latency, and degrade energy efficiency.
-
- rcupdate.rcu_cpu_stall_suppress= [KNL]
- Suppress RCU CPU stall warning messages.
-
- rcupdate.rcu_cpu_stall_timeout= [KNL]
- Set timeout for RCU CPU stall warning messages.
+ No effect on CONFIG_TINY_RCU kernels.
+
+ rcupdate.rcu_normal= [KNL]
+ Use only normal grace-period primitives,
+ for example, synchronize_rcu() instead of
+ synchronize_rcu_expedited(). This improves
+ real-time latency, CPU utilization, and
+ energy efficiency, but can expose users to
+ increased grace-period latency. This parameter
+ overrides rcupdate.rcu_expedited. No effect on
+ CONFIG_TINY_RCU kernels.
+
+ rcupdate.rcu_normal_after_boot= [KNL]
+ Once boot has completed (that is, after
+ rcu_end_inkernel_boot() has been invoked), use
+ only normal grace-period primitives. No effect
+ on CONFIG_TINY_RCU kernels.
rcupdate.rcu_task_stall_timeout= [KNL]
Set timeout in jiffies for RCU task stall warning
@@ -4117,6 +4140,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
or other driver-specific files in the
Documentation/watchdog/ directory.
+ workqueue.watchdog_thresh=
+ If CONFIG_WQ_WATCHDOG is configured, workqueue can
+ warn stall conditions and dump internal state to
+ help debugging. 0 disables workqueue stall
+ detection; otherwise, it's the stall threshold
+ duration in seconds. The default value is 30 and
+ it can be updated at runtime by writing to the
+ corresponding sysfs file.
+
workqueue.disable_numa
By default, all work items queued to unbound
workqueues are affine to the NUMA nodes they're
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index aef9487303d0..a61be39c7b51 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -194,7 +194,7 @@ There are some minimal guarantees that may be expected of a CPU:
(*) On any given CPU, dependent memory accesses will be issued in order, with
respect to itself. This means that for:
- WRITE_ONCE(Q, P); smp_read_barrier_depends(); D = READ_ONCE(*Q);
+ Q = READ_ONCE(P); smp_read_barrier_depends(); D = READ_ONCE(*Q);
the CPU will issue the following memory operations:
@@ -202,9 +202,9 @@ There are some minimal guarantees that may be expected of a CPU:
and always in that order. On most systems, smp_read_barrier_depends()
does nothing, but it is required for DEC Alpha. The READ_ONCE()
- and WRITE_ONCE() are required to prevent compiler mischief. Please
- note that you should normally use something like rcu_dereference()
- instead of open-coding smp_read_barrier_depends().
+ is required to prevent compiler mischief. Please note that you
+ should normally use something like rcu_dereference() instead of
+ open-coding smp_read_barrier_depends().
(*) Overlapping loads and stores within a particular CPU will appear to be
ordered within that CPU. This means that for:
@@ -1673,8 +1673,8 @@ There are some more advanced barrier functions:
(*) smp_store_mb(var, value)
This assigns the value to the variable and then inserts a full memory
- barrier after it, depending on the function. It isn't guaranteed to
- insert anything more than a compiler barrier in a UP compilation.
+ barrier after it. It isn't guaranteed to insert anything more than a
+ compiler barrier in a UP compilation.
(*) smp_mb__before_atomic();
diff --git a/Documentation/networking/e100.txt b/Documentation/networking/e100.txt
index f862cf3aff34..42ddbd4b52a9 100644
--- a/Documentation/networking/e100.txt
+++ b/Documentation/networking/e100.txt
@@ -181,17 +181,3 @@ For general information, go to the Intel support website at:
If an issue is identified with the released source code on the supported
kernel with a supported adapter, email the specific information related to the
issue to e1000-devel@lists.sourceforge.net.
-
-
-License
-=======
-
-This software program is released under the terms of a license agreement
-between you ('Licensee') and Intel. Do not use or load this software or any
-associated materials (collectively, the 'Software') until you have carefully
-read the full terms and conditions of the file COPYING located in this software
-package. By loading or using the Software, you agree to the terms of this
-Agreement. If you do not agree with the terms of this Agreement, do not install
-or use the Software.
-
-* Other names and brands may be claimed as the property of others.
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index af70d1541d3a..73c6b1ef0e84 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -551,6 +551,21 @@ the recommended setting is 60.
==============================================================
+panic_on_io_nmi:
+
+Controls the kernel's behavior when a CPU receives an NMI caused by
+an IO error.
+
+0: try to continue operation (default)
+
+1: panic immediately. The IO error triggered an NMI. This indicates a
+ serious system condition which could result in IO data corruption.
+ Rather than continuing, panicking might be a better choice. Some
+ servers issue this sort of NMI when the dump button is pushed,
+ and you can use this option to take a crash dump.
+
+==============================================================
+
panic_on_oops:
Controls the kernel's behaviour when an oops or BUG is encountered.
diff --git a/Documentation/trace/events-msr.txt b/Documentation/trace/events-msr.txt
new file mode 100644
index 000000000000..78c383bf06aa
--- /dev/null
+++ b/Documentation/trace/events-msr.txt
@@ -0,0 +1,37 @@
+
+The x86 kernel supports tracing most MSR (Model Specific Register) accesses.
+To see the definition of the MSRs on Intel systems please see the SDM
+at http://www.intel.com/sdm (Volume 3)
+
+Available trace points:
+
+/sys/kernel/debug/tracing/events/msr/
+
+Trace MSR reads
+
+read_msr
+
+msr: MSR number
+val: Value written
+failed: 1 if the access failed, otherwise 0
+
+
+Trace MSR writes
+
+write_msr
+
+msr: MSR number
+val: Value written
+failed: 1 if the access failed, otherwise 0
+
+
+Trace RDPMC in kernel
+
+rdpmc
+
+The trace data can be post processed with the postprocess/decode_msr.py script
+
+cat /sys/kernel/debug/tracing/trace | decode_msr.py /usr/src/linux/include/asm/msr-index.h
+
+to add symbolic MSR names.
+
diff --git a/Documentation/trace/postprocess/decode_msr.py b/Documentation/trace/postprocess/decode_msr.py
new file mode 100644
index 000000000000..0ab40e0db580
--- /dev/null
+++ b/Documentation/trace/postprocess/decode_msr.py
@@ -0,0 +1,37 @@
+#!/usr/bin/python
+# add symbolic names to read_msr / write_msr in trace
+# decode_msr msr-index.h < trace
+import sys
+import re
+
+msrs = dict()
+
+with open(sys.argv[1] if len(sys.argv) > 1 else "msr-index.h", "r") as f:
+ for j in f:
+ m = re.match(r'#define (MSR_\w+)\s+(0x[0-9a-fA-F]+)', j)
+ if m:
+ msrs[int(m.group(2), 16)] = m.group(1)
+
+extra_ranges = (
+ ( "MSR_LASTBRANCH_%d_FROM_IP", 0x680, 0x69F ),
+ ( "MSR_LASTBRANCH_%d_TO_IP", 0x6C0, 0x6DF ),
+ ( "LBR_INFO_%d", 0xdc0, 0xddf ),
+)
+
+for j in sys.stdin:
+ m = re.search(r'(read|write)_msr:\s+([0-9a-f]+)', j)
+ if m:
+ r = None
+ num = int(m.group(2), 16)
+ if num in msrs:
+ r = msrs[num]
+ else:
+ for er in extra_ranges:
+ if er[1] <= num <= er[2]:
+ r = er[0] % (num - er[1],)
+ break
+ if r:
+ j = j.replace(" " + m.group(2), " " + r + "(" + m.group(2) + ")")
+ print j,
+
+
diff --git a/MAINTAINERS b/MAINTAINERS
index e9caa4b28828..3cbad0f366f0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -318,7 +318,7 @@ M: Zhang Rui <rui.zhang@intel.com>
L: linux-acpi@vger.kernel.org
W: https://01.org/linux-acpi
S: Supported
-F: drivers/acpi/video.c
+F: drivers/acpi/acpi_video.c
ACPI WMI DRIVER
L: platform-driver-x86@vger.kernel.org
@@ -1847,7 +1847,7 @@ S: Supported
F: drivers/net/wireless/ath/ath6kl/
WILOCITY WIL6210 WIRELESS DRIVER
-M: Vladimir Kondratiev <qca_vkondrat@qca.qualcomm.com>
+M: Maya Erez <qca_merez@qca.qualcomm.com>
L: linux-wireless@vger.kernel.org
L: wil6210@qca.qualcomm.com
S: Supported
@@ -1931,7 +1931,7 @@ S: Supported
F: drivers/i2c/busses/i2c-at91.c
ATMEL ISI DRIVER
-M: Josh Wu <josh.wu@atmel.com>
+M: Ludovic Desroches <ludovic.desroches@atmel.com>
L: linux-media@vger.kernel.org
S: Supported
F: drivers/media/platform/soc_camera/atmel-isi.c
@@ -1950,7 +1950,8 @@ S: Supported
F: drivers/net/ethernet/cadence/
ATMEL NAND DRIVER
-M: Josh Wu <josh.wu@atmel.com>
+M: Wenyou Yang <wenyou.yang@atmel.com>
+M: Josh Wu <rainyfeeling@outlook.com>
L: linux-mtd@lists.infradead.org
S: Supported
F: drivers/mtd/nand/atmel_nand*
@@ -2449,7 +2450,9 @@ F: drivers/firmware/broadcom/*
BROADCOM STB NAND FLASH DRIVER
M: Brian Norris <computersforpeace@gmail.com>
+M: Kamal Dasu <kdasu.kdev@gmail.com>
L: linux-mtd@lists.infradead.org
+L: bcm-kernel-feedback-list@broadcom.com
S: Maintained
F: drivers/mtd/nand/brcmnand/
@@ -2546,7 +2549,7 @@ F: arch/c6x/
CACHEFILES: FS-CACHE BACKEND FOR CACHING ON MOUNTED FILESYSTEMS
M: David Howells <dhowells@redhat.com>
-L: linux-cachefs@redhat.com
+L: linux-cachefs@redhat.com (moderated for non-subscribers)
S: Supported
F: Documentation/filesystems/caching/cachefiles.txt
F: fs/cachefiles/
@@ -2929,10 +2932,9 @@ S: Maintained
F: drivers/platform/x86/compal-laptop.c
CONEXANT ACCESSRUNNER USB DRIVER
-M: Simon Arlott <cxacru@fire.lp0.eu>
L: accessrunner-general@lists.sourceforge.net
W: http://accessrunner.sourceforge.net/
-S: Maintained
+S: Orphan
F: drivers/usb/atm/cxacru.c
CONFIGFS
@@ -2973,6 +2975,7 @@ F: kernel/cpuset.c
CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
M: Johannes Weiner <hannes@cmpxchg.org>
M: Michal Hocko <mhocko@kernel.org>
+M: Vladimir Davydov <vdavydov@virtuozzo.com>
L: cgroups@vger.kernel.org
L: linux-mm@kvack.org
S: Maintained
@@ -4409,6 +4412,7 @@ K: fmc_d.*register
FPGA MANAGER FRAMEWORK
M: Alan Tull <atull@opensource.altera.com>
+R: Moritz Fischer <moritz.fischer@ettus.com>
S: Maintained
F: drivers/fpga/
F: include/linux/fpga/fpga-mgr.h
@@ -4559,7 +4563,7 @@ F: include/linux/frontswap.h
FS-CACHE: LOCAL CACHING FOR NETWORK FILESYSTEMS
M: David Howells <dhowells@redhat.com>
-L: linux-cachefs@redhat.com
+L: linux-cachefs@redhat.com (moderated for non-subscribers)
S: Supported
F: Documentation/filesystems/caching/
F: fs/fscache/
@@ -5574,7 +5578,7 @@ R: Jesse Brandeburg <jesse.brandeburg@intel.com>
R: Shannon Nelson <shannon.nelson@intel.com>
R: Carolyn Wyborny <carolyn.wyborny@intel.com>
R: Don Skidmore <donald.c.skidmore@intel.com>
-R: Matthew Vick <matthew.vick@intel.com>
+R: Bruce Allan <bruce.w.allan@intel.com>
R: John Ronciak <john.ronciak@intel.com>
R: Mitch Williams <mitch.a.williams@intel.com>
L: intel-wired-lan@lists.osuosl.org
@@ -5618,9 +5622,7 @@ F: Documentation/trace/intel_th.txt
F: drivers/hwtracing/intel_th/
INTEL(R) TRUSTED EXECUTION TECHNOLOGY (TXT)
-M: Richard L Maliszewski <richard.l.maliszewski@intel.com>
-M: Gang Wei <gang.wei@intel.com>
-M: Shane Wang <shane.wang@intel.com>
+M: Ning Sun <ning.sun@intel.com>
L: tboot-devel@lists.sourceforge.net
W: http://tboot.sourceforge.net
T: hg http://tboot.hg.sourceforge.net:8000/hgroot/tboot/tboot
@@ -5711,13 +5713,6 @@ M: Juanjo Ciarlante <jjciarla@raiz.uncu.edu.ar>
S: Maintained
F: net/ipv4/netfilter/ipt_MASQUERADE.c
-IP1000A 10/100/1000 GIGABIT ETHERNET DRIVER
-M: Francois Romieu <romieu@fr.zoreil.com>
-M: Sorbica Shieh <sorbica@icplus.com.tw>
-L: netdev@vger.kernel.org
-S: Maintained
-F: drivers/net/ethernet/icplus/ipg.*
-
IPATH DRIVER
M: Mike Marciniszyn <infinipath@intel.com>
L: linux-rdma@vger.kernel.org
@@ -6371,6 +6366,7 @@ F: arch/*/include/asm/pmem.h
LIGHTNVM PLATFORM SUPPORT
M: Matias Bjorling <mb@lightnvm.io>
W: http://github/OpenChannelSSD
+L: linux-block@vger.kernel.org
S: Maintained
F: drivers/lightnvm/
F: include/linux/lightnvm.h
@@ -6923,13 +6919,21 @@ F: drivers/scsi/megaraid.*
F: drivers/scsi/megaraid/
MELLANOX ETHERNET DRIVER (mlx4_en)
-M: Amir Vadai <amirv@mellanox.com>
+M: Eugenia Emantayev <eugenia@mellanox.com>
L: netdev@vger.kernel.org
S: Supported
W: http://www.mellanox.com
Q: http://patchwork.ozlabs.org/project/netdev/list/
F: drivers/net/ethernet/mellanox/mlx4/en_*
+MELLANOX ETHERNET DRIVER (mlx5e)
+M: Saeed Mahameed <saeedm@mellanox.com>
+L: netdev@vger.kernel.org
+S: Supported
+W: http://www.mellanox.com
+Q: http://patchwork.ozlabs.org/project/netdev/list/
+F: drivers/net/ethernet/mellanox/mlx5/core/en_*
+
MELLANOX ETHERNET SWITCH DRIVERS
M: Jiri Pirko <jiri@mellanox.com>
M: Ido Schimmel <idosch@mellanox.com>
@@ -7901,6 +7905,18 @@ S: Maintained
F: net/openvswitch/
F: include/uapi/linux/openvswitch.h
+OPERATING PERFORMANCE POINTS (OPP)
+M: Viresh Kumar <vireshk@kernel.org>
+M: Nishanth Menon <nm@ti.com>
+M: Stephen Boyd <sboyd@codeaurora.org>
+L: linux-pm@vger.kernel.org
+S: Maintained
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git
+F: drivers/base/power/opp/
+F: include/linux/pm_opp.h
+F: Documentation/power/opp.txt
+F: Documentation/devicetree/bindings/opp/
+
OPL4 DRIVER
M: Clemens Ladisch <clemens@ladisch.de>
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
@@ -8269,7 +8285,7 @@ F: include/linux/delayacct.h
F: kernel/delayacct.c
PERFORMANCE EVENTS SUBSYSTEM
-M: Peter Zijlstra <a.p.zijlstra@chello.nl>
+M: Peter Zijlstra <peterz@infradead.org>
M: Ingo Molnar <mingo@redhat.com>
M: Arnaldo Carvalho de Melo <acme@kernel.org>
L: linux-kernel@vger.kernel.org
@@ -8362,6 +8378,14 @@ L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
S: Maintained
F: drivers/pinctrl/samsung/
+PIN CONTROLLER - SINGLE
+M: Tony Lindgren <tony@atomide.com>
+M: Haojian Zhuang <haojian.zhuang@linaro.org>
+L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L: linux-omap@vger.kernel.org
+S: Maintained
+F: drivers/pinctrl/pinctrl-single.c
+
PIN CONTROLLER - ST SPEAR
M: Viresh Kumar <vireshk@kernel.org>
L: spear-devel@list.st.com
@@ -8928,6 +8952,13 @@ F: drivers/rpmsg/
F: Documentation/rpmsg.txt
F: include/linux/rpmsg.h
+RENESAS ETHERNET DRIVERS
+R: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
+L: netdev@vger.kernel.org
+L: linux-sh@vger.kernel.org
+F: drivers/net/ethernet/renesas/
+F: include/linux/sh_eth.h
+
RESET CONTROLLER FRAMEWORK
M: Philipp Zabel <p.zabel@pengutronix.de>
S: Maintained
@@ -9314,7 +9345,6 @@ F: drivers/i2c/busses/i2c-designware-*
F: include/linux/platform_data/i2c-designware.h
SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER
-M: Seungwon Jeon <tgih.jun@samsung.com>
M: Jaehoon Chung <jh80.chung@samsung.com>
L: linux-mmc@vger.kernel.org
S: Maintained
@@ -9334,7 +9364,7 @@ M: Andreas Noever <andreas.noever@gmail.com>
S: Maintained
F: drivers/thunderbolt/
-TIMEKEEPING, CLOCKSOURCE CORE, NTP
+TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER
M: John Stultz <john.stultz@linaro.org>
M: Thomas Gleixner <tglx@linutronix.de>
L: linux-kernel@vger.kernel.org
@@ -9347,6 +9377,7 @@ F: include/uapi/linux/time.h
F: include/uapi/linux/timex.h
F: kernel/time/clocksource.c
F: kernel/time/time*.c
+F: kernel/time/alarmtimer.c
F: kernel/time/ntp.c
F: tools/testing/selftests/timers/
@@ -9411,8 +9442,10 @@ F: include/scsi/sg.h
SCSI SUBSYSTEM
M: "James E.J. Bottomley" <JBottomley@odin.com>
-L: linux-scsi@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git
+M: "Martin K. Petersen" <martin.petersen@oracle.com>
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git
+L: linux-scsi@vger.kernel.org
S: Maintained
F: drivers/scsi/
F: include/scsi/
@@ -10887,9 +10920,9 @@ S: Maintained
F: drivers/media/tuners/tua9001*
TULIP NETWORK DRIVERS
-M: Grant Grundler <grundler@parisc-linux.org>
L: netdev@vger.kernel.org
-S: Maintained
+L: linux-parisc@vger.kernel.org
+S: Orphan
F: drivers/net/ethernet/dec/tulip/
TUN/TAP driver
diff --git a/Makefile b/Makefile
index 3a0234f50f36..70dea02f1346 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 4
PATCHLEVEL = 4
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION =
NAME = Blurry Fish Butt
# *DOCUMENTATION*
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 2c2ac3f3ff80..6312f607932f 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -445,6 +445,7 @@ config LINUX_LINK_BASE
However some customers have peripherals mapped at this addr, so
Linux needs to be scooted a bit.
If you don't know what the above means, leave this setting alone.
+ This needs to match memory start address specified in Device Tree
config HIGHMEM
bool "High Memory Support"
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index cf0cf34eeb24..aeb19021099e 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -81,7 +81,7 @@ endif
LIBGCC := $(shell $(CC) $(cflags-y) --print-libgcc-file-name)
# Modules with short calls might break for calls into builtin-kernel
-KBUILD_CFLAGS_MODULE += -mlong-calls
+KBUILD_CFLAGS_MODULE += -mlong-calls -mno-millicode
# Finally dump eveything into kernel build system
KBUILD_CFLAGS += $(cflags-y)
diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
index f3db32154973..44a578c10732 100644
--- a/arch/arc/boot/dts/axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/axs10x_mb.dtsi
@@ -46,6 +46,7 @@
snps,pbl = < 32 >;
clocks = <&apbclk>;
clock-names = "stmmaceth";
+ max-speed = <100>;
};
ehci@0x40000 {
diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts
index b0eb0e7fe21d..fc81879bc1f5 100644
--- a/arch/arc/boot/dts/nsim_hs.dts
+++ b/arch/arc/boot/dts/nsim_hs.dts
@@ -17,7 +17,8 @@
memory {
device_type = "memory";
- reg = <0x0 0x80000000 0x0 0x40000000 /* 1 GB low mem */
+ /* CONFIG_LINUX_LINK_BASE needs to match low mem start */
+ reg = <0x0 0x80000000 0x0 0x20000000 /* 512 MB low mem */
0x1 0x00000000 0x0 0x40000000>; /* 1 GB highmem */
};
diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
index c92c0ef1e9d2..f1ac9818b751 100644
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
index cfac24e0e7b6..323486d6ee83 100644
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
index 9922a118a15a..66191cd0447e 100644
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig
index f761a7c70761..f68838e8068a 100644
--- a/arch/arc/configs/nsim_hs_defconfig
+++ b/arch/arc/configs/nsim_hs_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
# CONFIG_SWAP is not set
diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig
index dc6f74f41283..96bd1c20fb0b 100644
--- a/arch/arc/configs/nsim_hs_smp_defconfig
+++ b/arch/arc/configs/nsim_hs_smp_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
# CONFIG_SWAP is not set
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
index 3fef0a210c56..fcae66683ca0 100644
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
# CONFIG_SWAP is not set
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
index 51784837daae..b01b659168ea 100644
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig
index ef35ef3923dd..a07f20de221b 100644
--- a/arch/arc/configs/vdk_hs38_defconfig
+++ b/arch/arc/configs/vdk_hs38_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
# CONFIG_CROSS_MEMORY_ATTACH is not set
diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig
index 634509e5e572..f36c047b33ca 100644
--- a/arch/arc/configs/vdk_hs38_smp_defconfig
+++ b/arch/arc/configs/vdk_hs38_smp_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
# CONFIG_CROSS_MEMORY_ATTACH is not set
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index abf06e81c929..210ef3e72332 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -62,9 +62,7 @@ extern int ioc_exists;
#define ARC_REG_IC_IVIC 0x10
#define ARC_REG_IC_CTRL 0x11
#define ARC_REG_IC_IVIL 0x19
-#if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4)
#define ARC_REG_IC_PTAG 0x1E
-#endif
#define ARC_REG_IC_PTAG_HI 0x1F
/* Bit val in IC_CTRL */
diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h
index ad481c24070d..258b0e5ad332 100644
--- a/arch/arc/include/asm/irqflags-arcv2.h
+++ b/arch/arc/include/asm/irqflags-arcv2.h
@@ -37,6 +37,9 @@
#define ISA_INIT_STATUS_BITS (STATUS_IE_MASK | STATUS_AD_MASK | \
(ARCV2_IRQ_DEF_PRIO << 1))
+/* SLEEP needs default irq priority (<=) which can interrupt the doze */
+#define ISA_SLEEP_ARG (0x10 | ARCV2_IRQ_DEF_PRIO)
+
#ifndef __ASSEMBLY__
/*
diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
index d8c608174617..c1d36458bfb7 100644
--- a/arch/arc/include/asm/irqflags-compact.h
+++ b/arch/arc/include/asm/irqflags-compact.h
@@ -43,6 +43,8 @@
#define ISA_INIT_STATUS_BITS STATUS_IE_MASK
+#define ISA_SLEEP_ARG 0x3
+
#ifndef __ASSEMBLY__
/******************************************************************
diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h
index 6ff657a904b6..c28e6c347b49 100644
--- a/arch/arc/include/asm/mach_desc.h
+++ b/arch/arc/include/asm/mach_desc.h
@@ -23,7 +23,7 @@
* @dt_compat: Array of device tree 'compatible' strings
* (XXX: although only 1st entry is looked at)
* @init_early: Very early callback [called from setup_arch()]
- * @init_cpu_smp: for each CPU as it is coming up (SMP as well as UP)
+ * @init_per_cpu: for each CPU as it is coming up (SMP as well as UP)
* [(M):init_IRQ(), (o):start_kernel_secondary()]
* @init_machine: arch initcall level callback (e.g. populate static
* platform devices or parse Devicetree)
@@ -35,7 +35,7 @@ struct machine_desc {
const char **dt_compat;
void (*init_early)(void);
#ifdef CONFIG_SMP
- void (*init_cpu_smp)(unsigned int);
+ void (*init_per_cpu)(unsigned int);
#endif
void (*init_machine)(void);
void (*init_late)(void);
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index 133c867d15af..991380438d6b 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -48,7 +48,7 @@ extern int smp_ipi_irq_setup(int cpu, int irq);
* @init_early_smp: A SMP specific h/w block can init itself
* Could be common across platforms so not covered by
* mach_desc->init_early()
- * @init_irq_cpu: Called for each core so SMP h/w block driver can do
+ * @init_per_cpu: Called for each core so SMP h/w block driver can do
* any needed setup per cpu (e.g. IPI request)
* @cpu_kick: For Master to kickstart a cpu (optionally at a PC)
* @ipi_send: To send IPI to a @cpu
@@ -57,7 +57,7 @@ extern int smp_ipi_irq_setup(int cpu, int irq);
struct plat_smp_ops {
const char *info;
void (*init_early_smp)(void);
- void (*init_irq_cpu)(int cpu);
+ void (*init_per_cpu)(int cpu);
void (*cpu_kick)(int cpu, unsigned long pc);
void (*ipi_send)(int cpu);
void (*ipi_clear)(int irq);
diff --git a/arch/arc/include/asm/unwind.h b/arch/arc/include/asm/unwind.h
index 7ca628b6ee2a..c11a25bb8158 100644
--- a/arch/arc/include/asm/unwind.h
+++ b/arch/arc/include/asm/unwind.h
@@ -112,7 +112,6 @@ struct unwind_frame_info {
extern int arc_unwind(struct unwind_frame_info *frame);
extern void arc_unwind_init(void);
-extern void arc_unwind_setup(void);
extern void *unwind_add_table(struct module *module, const void *table_start,
unsigned long table_size);
extern void unwind_remove_table(void *handle, int init_only);
@@ -152,9 +151,6 @@ static inline void arc_unwind_init(void)
{
}
-static inline void arc_unwind_setup(void)
-{
-}
#define unwind_add_table(a, b, c)
#define unwind_remove_table(a, b)
diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
index c14a5bea0c76..5d446df2c413 100644
--- a/arch/arc/kernel/ctx_sw.c
+++ b/arch/arc/kernel/ctx_sw.c
@@ -58,8 +58,6 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
"st sp, [r24] \n\t"
#endif
- "sync \n\t"
-
/*
* setup _current_task with incoming tsk.
* optionally, set r25 to that as well
diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S
index e248594097e7..e6890b1f8650 100644
--- a/arch/arc/kernel/ctx_sw_asm.S
+++ b/arch/arc/kernel/ctx_sw_asm.S
@@ -44,9 +44,6 @@ __switch_to:
* don't need to do anything special to return it
*/
- /* hardware memory barrier */
- sync
-
/*
* switch to new task, contained in r1
* Temp reg r3 is required to get the ptr to store val
diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
index 26c156827479..0394f9f61b46 100644
--- a/arch/arc/kernel/intc-arcv2.c
+++ b/arch/arc/kernel/intc-arcv2.c
@@ -106,10 +106,21 @@ static struct irq_chip arcv2_irq_chip = {
static int arcv2_irq_map(struct irq_domain *d, unsigned int irq,
irq_hw_number_t hw)
{
- if (irq == TIMER0_IRQ || irq == IPI_IRQ)
+ /*
+ * core intc IRQs [16, 23]:
+ * Statically assigned always private-per-core (Timers, WDT, IPI, PCT)
+ */
+ if (hw < 24) {
+ /*
+ * A subsequent request_percpu_irq() fails if percpu_devid is
+ * not set. That in turns sets NOAUTOEN, meaning each core needs
+ * to call enable_percpu_irq()
+ */
+ irq_set_percpu_devid(irq);
irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_percpu_irq);
- else
+ } else {
irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_level_irq);
+ }
return 0;
}
diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index 2ee226546c6a..ba17f85285cf 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -29,11 +29,11 @@ void __init init_IRQ(void)
#ifdef CONFIG_SMP
/* a SMP H/w block could do IPI IRQ request here */
- if (plat_smp_ops.init_irq_cpu)
- plat_smp_ops.init_irq_cpu(smp_processor_id());
+ if (plat_smp_ops.init_per_cpu)
+ plat_smp_ops.init_per_cpu(smp_processor_id());
- if (machine_desc->init_cpu_smp)
- machine_desc->init_cpu_smp(smp_processor_id());
+ if (machine_desc->init_per_cpu)
+ machine_desc->init_per_cpu(smp_processor_id());
#endif
}
@@ -51,6 +51,18 @@ void arch_do_IRQ(unsigned int irq, struct pt_regs *regs)
set_irq_regs(old_regs);
}
+/*
+ * API called for requesting percpu interrupts - called by each CPU
+ * - For boot CPU, actually request the IRQ with genirq core + enables
+ * - For subsequent callers only enable called locally
+ *
+ * Relies on being called by boot cpu first (i.e. request called ahead) of
+ * any enable as expected by genirq. Hence Suitable only for TIMER, IPI
+ * which are guaranteed to be setup on boot core first.
+ * Late probed peripherals such as perf can't use this as there no guarantee
+ * of being called on boot CPU first.
+ */
+
void arc_request_percpu_irq(int irq, int cpu,
irqreturn_t (*isr)(int irq, void *dev),
const char *irq_nm,
@@ -60,14 +72,17 @@ void arc_request_percpu_irq(int irq, int cpu,
if (!cpu) {
int rc;
+#ifdef CONFIG_ISA_ARCOMPACT
/*
- * These 2 calls are essential to making percpu IRQ APIs work
- * Ideally these details could be hidden in irq chip map function
- * but the issue is IPIs IRQs being static (non-DT) and platform
- * specific, so we can't identify them there.
+ * A subsequent request_percpu_irq() fails if percpu_devid is
+ * not set. That in turns sets NOAUTOEN, meaning each core needs
+ * to call enable_percpu_irq()
+ *
+ * For ARCv2, this is done in irq map function since we know
+ * which irqs are strictly per cpu
*/
irq_set_percpu_devid(irq);
- irq_modify_status(irq, IRQ_NOAUTOEN, 0); /* @irq, @clr, @set */
+#endif
rc = request_percpu_irq(irq, isr, irq_nm, percpu_dev);
if (rc)
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index 74a9b074ac3e..bd237acdf4f2 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -132,7 +132,7 @@ static void mcip_probe_n_setup(void)
struct plat_smp_ops plat_smp_ops = {
.info = smp_cpuinfo_buf,
.init_early_smp = mcip_probe_n_setup,
- .init_irq_cpu = mcip_setup_per_cpu,
+ .init_per_cpu = mcip_setup_per_cpu,
.ipi_send = mcip_ipi_send,
.ipi_clear = mcip_ipi_clear,
};
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index 0c08bb1ce15a..8b134cfe5e1f 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -428,12 +428,11 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
#endif /* CONFIG_ISA_ARCV2 */
-void arc_cpu_pmu_irq_init(void)
+static void arc_cpu_pmu_irq_init(void *data)
{
- struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
+ int irq = *(int *)data;
- arc_request_percpu_irq(arc_pmu->irq, smp_processor_id(), arc_pmu_intr,
- "ARC perf counters", pmu_cpu);
+ enable_percpu_irq(irq, IRQ_TYPE_NONE);
/* Clear all pending interrupt flags */
write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
@@ -515,7 +514,6 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
if (has_interrupts) {
int irq = platform_get_irq(pdev, 0);
- unsigned long flags;
if (irq < 0) {
pr_err("Cannot get IRQ number for the platform\n");
@@ -524,24 +522,12 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
arc_pmu->irq = irq;
- /*
- * arc_cpu_pmu_irq_init() needs to be called on all cores for
- * their respective local PMU.
- * However we use opencoded on_each_cpu() to ensure it is called
- * on core0 first, so that arc_request_percpu_irq() sets up
- * AUTOEN etc. Otherwise enable_percpu_irq() fails to enable
- * perf IRQ on non master cores.
- * see arc_request_percpu_irq()
- */
- preempt_disable();
- local_irq_save(flags);
- arc_cpu_pmu_irq_init();
- local_irq_restore(flags);
- smp_call_function((smp_call_func_t)arc_cpu_pmu_irq_init, 0, 1);
- preempt_enable();
-
- /* Clean all pending interrupt flags */
- write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
+ /* intc map function ensures irq_set_percpu_devid() called */
+ request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters",
+ this_cpu_ptr(&arc_pmu_cpu));
+
+ on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1);
+
} else
arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index 91d5a0f1f3f7..a3f750e76b68 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -44,11 +44,10 @@ SYSCALL_DEFINE0(arc_gettls)
void arch_cpu_idle(void)
{
/* sleep, but enable all interrupts before committing */
- if (is_isa_arcompact()) {
- __asm__("sleep 0x3");
- } else {
- __asm__("sleep 0x10");
- }
+ __asm__ __volatile__(
+ "sleep %0 \n"
+ :
+ :"I"(ISA_SLEEP_ARG)); /* can't be "r" has to be embedded const */
}
asmlinkage void ret_from_fork(void);
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index c33e77c0ad3e..e1b87444ea9a 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -429,7 +429,6 @@ void __init setup_arch(char **cmdline_p)
#endif
arc_unwind_init();
- arc_unwind_setup();
}
static int __init customize_machine(void)
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 580587805fa3..ef6e9e15b82a 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -132,11 +132,11 @@ void start_kernel_secondary(void)
pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu);
/* Some SMP H/w setup - for each cpu */
- if (plat_smp_ops.init_irq_cpu)
- plat_smp_ops.init_irq_cpu(cpu);
+ if (plat_smp_ops.init_per_cpu)
+ plat_smp_ops.init_per_cpu(cpu);
- if (machine_desc->init_cpu_smp)
- machine_desc->init_cpu_smp(cpu);
+ if (machine_desc->init_per_cpu)
+ machine_desc->init_per_cpu(cpu);
arc_local_timer_setup();
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index 93c6ea52b671..5eb707640e9c 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -170,6 +170,23 @@ static struct unwind_table *find_table(unsigned long pc)
static unsigned long read_pointer(const u8 **pLoc,
const void *end, signed ptrType);
+static void init_unwind_hdr(struct unwind_table *table,
+ void *(*alloc) (unsigned long));
+
+/*
+ * wrappers for header alloc (vs. calling one vs. other at call site)
+ * to elide section mismatches warnings
+ */
+static void *__init unw_hdr_alloc_early(unsigned long sz)
+{
+ return __alloc_bootmem_nopanic(sz, sizeof(unsigned int),
+ MAX_DMA_ADDRESS);
+}
+
+static void *unw_hdr_alloc(unsigned long sz)
+{
+ return kmalloc(sz, GFP_KERNEL);
+}
static void init_unwind_table(struct unwind_table *table, const char *name,
const void *core_start, unsigned long core_size,
@@ -209,6 +226,8 @@ void __init arc_unwind_init(void)
__start_unwind, __end_unwind - __start_unwind,
NULL, 0);
/*__start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);*/
+
+ init_unwind_hdr(&root_table, unw_hdr_alloc_early);
}
static const u32 bad_cie, not_fde;
@@ -241,8 +260,8 @@ static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
e2->fde = v;
}
-static void __init setup_unwind_table(struct unwind_table *table,
- void *(*alloc) (unsigned long))
+static void init_unwind_hdr(struct unwind_table *table,
+ void *(*alloc) (unsigned long))
{
const u8 *ptr;
unsigned long tableSize = table->size, hdrSize;
@@ -277,10 +296,10 @@ static void __init setup_unwind_table(struct unwind_table *table,
if (cie == &not_fde)
continue;
if (cie == NULL || cie == &bad_cie)
- return;
+ goto ret_err;
ptrType = fde_pointer_type(cie);
if (ptrType < 0)
- return;
+ goto ret_err;
ptr = (const u8 *)(fde + 2);
if (!read_pointer(&ptr, (const u8 *)(fde + 1) + *fde,
@@ -296,13 +315,15 @@ static void __init setup_unwind_table(struct unwind_table *table,
}
if (tableSize || !n)
- return;
+ goto ret_err;
hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
+ 2 * n * sizeof(unsigned long);
+
header = alloc(hdrSize);
if (!header)
- return;
+ goto ret_err;
+
header->version = 1;
header->eh_frame_ptr_enc = DW_EH_PE_abs | DW_EH_PE_native;
header->fde_count_enc = DW_EH_PE_abs | DW_EH_PE_data4;
@@ -340,18 +361,10 @@ static void __init setup_unwind_table(struct unwind_table *table,
table->hdrsz = hdrSize;
smp_wmb();
table->header = (const void *)header;
-}
-
-static void *__init balloc(unsigned long sz)
-{
- return __alloc_bootmem_nopanic(sz,
- sizeof(unsigned int),
- __pa(MAX_DMA_ADDRESS));
-}
+ return;
-void __init arc_unwind_setup(void)
-{
- setup_unwind_table(&root_table, balloc);
+ret_err:
+ panic("Attention !!! Dwarf FDE parsing errors\n");;
}
#ifdef CONFIG_MODULES
@@ -377,6 +390,8 @@ void *unwind_add_table(struct module *module, const void *table_start,
table_start, table_size,
NULL, 0);
+ init_unwind_hdr(table, unw_hdr_alloc);
+
#ifdef UNWIND_DEBUG
unw_debug("Table added for [%s] %lx %lx\n",
module->name, table->core.pc, table->core.range);
@@ -439,6 +454,7 @@ void unwind_remove_table(void *handle, int init_only)
info.init_only = init_only;
unlink_table(&info); /* XXX: SMP */
+ kfree(table->header);
kfree(table);
}
@@ -588,9 +604,6 @@ static signed fde_pointer_type(const u32 *cie)
const u8 *ptr = (const u8 *)(cie + 2);
unsigned version = *ptr;
- if (version != 1)
- return -1; /* unsupported */
-
if (*++ptr) {
const char *aug;
const u8 *end = (const u8 *)(cie + 1) + *cie;
@@ -986,42 +999,13 @@ int arc_unwind(struct unwind_frame_info *frame)
(const u8 *)(fde +
1) +
*fde, ptrType);
- if (pc >= endLoc)
+ if (pc >= endLoc) {
fde = NULL;
- } else
- fde = NULL;
- }
- if (fde == NULL) {
- for (fde = table->address, tableSize = table->size;
- cie = NULL, tableSize > sizeof(*fde)
- && tableSize - sizeof(*fde) >= *fde;
- tableSize -= sizeof(*fde) + *fde,
- fde += 1 + *fde / sizeof(*fde)) {
- cie = cie_for_fde(fde, table);
- if (cie == &bad_cie) {
cie = NULL;
- break;
}
- if (cie == NULL
- || cie == &not_fde
- || (ptrType = fde_pointer_type(cie)) < 0)
- continue;
- ptr = (const u8 *)(fde + 2);
- startLoc = read_pointer(&ptr,
- (const u8 *)(fde + 1) +
- *fde, ptrType);
- if (!startLoc)
- continue;
- if (!(ptrType & DW_EH_PE_indirect))
- ptrType &=
- DW_EH_PE_FORM | DW_EH_PE_signed;
- endLoc =
- startLoc + read_pointer(&ptr,
- (const u8 *)(fde +
- 1) +
- *fde, ptrType);
- if (pc >= startLoc && pc < endLoc)
- break;
+ } else {
+ fde = NULL;
+ cie = NULL;
}
}
}
@@ -1031,9 +1015,7 @@ int arc_unwind(struct unwind_frame_info *frame)
ptr = (const u8 *)(cie + 2);
end = (const u8 *)(cie + 1) + *cie;
frame->call_frame = 1;
- if ((state.version = *ptr) != 1)
- cie = NULL; /* unsupported version */
- else if (*++ptr) {
+ if (*++ptr) {
/* check if augmentation size is first (thus present) */
if (*ptr == 'z') {
while (++ptr < end && *ptr) {
diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c
index 065ee6bfa82a..92dd92cad7f9 100644
--- a/arch/arc/mm/highmem.c
+++ b/arch/arc/mm/highmem.c
@@ -111,7 +111,7 @@ void __kunmap_atomic(void *kv)
}
EXPORT_SYMBOL(__kunmap_atomic);
-noinline pte_t *alloc_kmap_pgtable(unsigned long kvaddr)
+static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr)
{
pgd_t *pgd_k;
pud_t *pud_k;
@@ -127,7 +127,7 @@ noinline pte_t *alloc_kmap_pgtable(unsigned long kvaddr)
return pte_k;
}
-void kmap_init(void)
+void __init kmap_init(void)
{
/* Due to recursive include hell, we can't do this in processor.h */
BUILD_BUG_ON(PAGE_OFFSET < (VMALLOC_END + FIXMAP_SIZE + PKMAP_SIZE));
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index a9305b5a2cd4..7d2c4fbf4f22 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -51,7 +51,9 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
int in_use = 0;
if (!low_mem_sz) {
- BUG_ON(base != low_mem_start);
+ if (base != low_mem_start)
+ panic("CONFIG_LINUX_LINK_BASE != DT memory { }");
+
low_mem_sz = size;
in_use = 1;
} else {
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 0ee739846847..daf2bf52b984 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -619,10 +619,10 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
int dirty = !test_and_set_bit(PG_dc_clean, &page->flags);
if (dirty) {
- /* wback + inv dcache lines */
+ /* wback + inv dcache lines (K-mapping) */
__flush_dcache_page(paddr, paddr);
- /* invalidate any existing icache lines */
+ /* invalidate any existing icache lines (U-mapping) */
if (vma->vm_flags & VM_EXEC)
__inv_icache_page(paddr, vaddr);
}
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0365cbbc9179..688dc7b0d951 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -76,6 +76,8 @@ config ARM
select IRQ_FORCED_THREADING
select MODULES_USE_ELF_REL
select NO_BOOTMEM
+ select OF_EARLY_FLATTREE if OF
+ select OF_RESERVED_MEM if OF
select OLD_SIGACTION
select OLD_SIGSUSPEND3
select PERF_USE_VMALLOC
@@ -609,6 +611,7 @@ config ARCH_PXA
select AUTO_ZRELADDR
select COMMON_CLK
select CLKDEV_LOOKUP
+ select CLKSRC_PXA
select CLKSRC_MMIO
select CLKSRC_OF
select GENERIC_CLOCKEVENTS
@@ -648,6 +651,8 @@ config ARCH_SA1100
select ARCH_SPARSEMEM_ENABLE
select CLKDEV_LOOKUP
select CLKSRC_MMIO
+ select CLKSRC_PXA
+ select CLKSRC_OF if OF
select CPU_FREQ
select CPU_SA1100
select GENERIC_CLOCKEVENTS
@@ -1822,8 +1827,6 @@ config USE_OF
bool "Flattened Device Tree support"
select IRQ_DOMAIN
select OF
- select OF_EARLY_FLATTREE
- select OF_RESERVED_MEM
help
Include support for flattened device tree machine descriptions.
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index d83ff9c9701e..de8791a4d131 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -74,7 +74,7 @@
reg = <0x48240200 0x100>;
interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&dpll_mpu_m2_ck>;
+ clocks = <&mpu_periphclk>;
};
local_timer: timer@48240600 {
@@ -82,7 +82,7 @@
reg = <0x48240600 0x100>;
interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&dpll_mpu_m2_ck>;
+ clocks = <&mpu_periphclk>;
};
l2-cache-controller@48242000 {
diff --git a/arch/arm/boot/dts/am43xx-clocks.dtsi b/arch/arm/boot/dts/am43xx-clocks.dtsi
index cc88728d751d..a38af2bfbfcf 100644
--- a/arch/arm/boot/dts/am43xx-clocks.dtsi
+++ b/arch/arm/boot/dts/am43xx-clocks.dtsi
@@ -259,6 +259,14 @@
ti,invert-autoidle-bit;
};
+ mpu_periphclk: mpu_periphclk {
+ #clock-cells = <0>;
+ compatible = "fixed-factor-clock";
+ clocks = <&dpll_mpu_m2_ck>;
+ clock-mult = <1>;
+ clock-div = <2>;
+ };
+
dpll_ddr_ck: dpll_ddr_ck {
#clock-cells = <0>;
compatible = "ti,am3-dpll-clock";
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts
index d9ba6b879fc1..00352e761b8c 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts
@@ -604,6 +604,7 @@
reg = <0x6f>;
interrupts-extended = <&crossbar_mpu GIC_SPI 2 IRQ_TYPE_EDGE_RISING>,
<&dra7_pmx_core 0x424>;
+ interrupt-names = "irq", "wakeup";
pinctrl-names = "default";
pinctrl-0 = <&mcp79410_pins_default>;
diff --git a/arch/arm/boot/dts/animeo_ip.dts b/arch/arm/boot/dts/animeo_ip.dts
index 4e0ad3b82796..0962f2fa3f6e 100644
--- a/arch/arm/boot/dts/animeo_ip.dts
+++ b/arch/arm/boot/dts/animeo_ip.dts
@@ -155,21 +155,21 @@
label = "keyswitch_in";
gpios = <&pioB 1 GPIO_ACTIVE_HIGH>;
linux,code = <28>;
- gpio-key,wakeup;
+ wakeup-source;
};
error_in {
label = "error_in";
gpios = <&pioB 2 GPIO_ACTIVE_HIGH>;
linux,code = <29>;
- gpio-key,wakeup;
+ wakeup-source;
};
btn {
label = "btn";
gpios = <&pioC 23 GPIO_ACTIVE_HIGH>;
linux,code = <31>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
};
diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi
index c6a0e9d7f1a9..e8b7f6726772 100644
--- a/arch/arm/boot/dts/armada-38x.dtsi
+++ b/arch/arm/boot/dts/armada-38x.dtsi
@@ -498,6 +498,7 @@
reg = <0x70000 0x4000>;
interrupts-extended = <&mpic 8>;
clocks = <&gateclk 4>;
+ tx-csum-limit = <9800>;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/at91-foxg20.dts b/arch/arm/boot/dts/at91-foxg20.dts
index f89598af4c2b..6bf873e7d96c 100644
--- a/arch/arm/boot/dts/at91-foxg20.dts
+++ b/arch/arm/boot/dts/at91-foxg20.dts
@@ -159,7 +159,7 @@
label = "Button";
gpios = <&pioC 4 GPIO_ACTIVE_LOW>;
linux,code = <0x103>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
};
diff --git a/arch/arm/boot/dts/at91-kizbox.dts b/arch/arm/boot/dts/at91-kizbox.dts
index bf18ece0c027..229e989eb60d 100644
--- a/arch/arm/boot/dts/at91-kizbox.dts
+++ b/arch/arm/boot/dts/at91-kizbox.dts
@@ -24,15 +24,6 @@
};
clocks {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
-
- main_clock: clock@0 {
- compatible = "atmel,osc", "fixed-clock";
- clock-frequency = <18432000>;
- };
-
main_xtal {
clock-frequency = <18432000>;
};
@@ -94,14 +85,14 @@
label = "PB_RST";
gpios = <&pioB 30 GPIO_ACTIVE_HIGH>;
linux,code = <0x100>;
- gpio-key,wakeup;
+ wakeup-source;
};
user {
label = "PB_USER";
gpios = <&pioB 31 GPIO_ACTIVE_HIGH>;
linux,code = <0x101>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
diff --git a/arch/arm/boot/dts/at91-kizbox2.dts b/arch/arm/boot/dts/at91-kizbox2.dts
index f0b1563cb3f1..50a14568f094 100644
--- a/arch/arm/boot/dts/at91-kizbox2.dts
+++ b/arch/arm/boot/dts/at91-kizbox2.dts
@@ -171,21 +171,21 @@
label = "PB_PROG";
gpios = <&pioE 27 GPIO_ACTIVE_LOW>;
linux,code = <0x102>;
- gpio-key,wakeup;
+ wakeup-source;
};
reset {
label = "PB_RST";
gpios = <&pioE 29 GPIO_ACTIVE_LOW>;
linux,code = <0x100>;
- gpio-key,wakeup;
+ wakeup-source;
};
user {
label = "PB_USER";
gpios = <&pioE 31 GPIO_ACTIVE_HIGH>;
linux,code = <0x101>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
diff --git a/arch/arm/boot/dts/at91-kizboxmini.dts b/arch/arm/boot/dts/at91-kizboxmini.dts
index 9f72b4932634..9682d105d4d8 100644
--- a/arch/arm/boot/dts/at91-kizboxmini.dts
+++ b/arch/arm/boot/dts/at91-kizboxmini.dts
@@ -98,14 +98,14 @@
label = "PB_PROG";
gpios = <&pioC 17 GPIO_ACTIVE_LOW>;
linux,code = <0x102>;
- gpio-key,wakeup;
+ wakeup-source;
};
reset {
label = "PB_RST";
gpios = <&pioC 16 GPIO_ACTIVE_LOW>;
linux,code = <0x100>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
diff --git a/arch/arm/boot/dts/at91-qil_a9260.dts b/arch/arm/boot/dts/at91-qil_a9260.dts
index a9aef53ab764..4f2eebf4a560 100644
--- a/arch/arm/boot/dts/at91-qil_a9260.dts
+++ b/arch/arm/boot/dts/at91-qil_a9260.dts
@@ -183,7 +183,7 @@
label = "user_pb";
gpios = <&pioB 10 GPIO_ACTIVE_LOW>;
linux,code = <28>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
diff --git a/arch/arm/boot/dts/at91-sama5d2_xplained.dts b/arch/arm/boot/dts/at91-sama5d2_xplained.dts
index e07c2b206beb..e74df327cdd3 100644
--- a/arch/arm/boot/dts/at91-sama5d2_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d2_xplained.dts
@@ -45,6 +45,7 @@
/dts-v1/;
#include "sama5d2.dtsi"
#include "sama5d2-pinfunc.h"
+#include <dt-bindings/mfd/atmel-flexcom.h>
/ {
model = "Atmel SAMA5D2 Xplained";
@@ -59,15 +60,6 @@
};
clocks {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
-
- main_clock: clock@0 {
- compatible = "atmel,osc", "fixed-clock";
- clock-frequency = <12000000>;
- };
-
slow_xtal {
clock-frequency = <32768>;
};
@@ -91,6 +83,22 @@
status = "okay";
};
+ sdmmc0: sdio-host@a0000000 {
+ bus-width = <8>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_sdmmc0_default>;
+ non-removable;
+ mmc-ddr-1_8v;
+ status = "okay";
+ };
+
+ sdmmc1: sdio-host@b0000000 {
+ bus-width = <4>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_sdmmc1_default>;
+ status = "okay"; /* conflict with qspi0 */
+ };
+
apb {
spi0: spi@f8000000 {
pinctrl-names = "default";
@@ -176,17 +184,55 @@
regulator-name = "VDD_SDHC_1V8";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
+ regulator-always-on;
};
};
};
};
+ flx0: flexcom@f8034000 {
+ atmel,flexcom-mode = <ATMEL_FLEXCOM_MODE_USART>;
+ status = "disabled"; /* conflict with ISC_D2 & ISC_D3 data pins */
+
+ uart5: serial@200 {
+ compatible = "atmel,at91sam9260-usart";
+ reg = <0x200 0x200>;
+ interrupts = <19 IRQ_TYPE_LEVEL_HIGH 7>;
+ clocks = <&flx0_clk>;
+ clock-names = "usart";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_flx0_default>;
+ atmel,fifo-size = <32>;
+ status = "okay";
+ };
+ };
+
uart3: serial@fc008000 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_uart3_default>;
status = "okay";
};
+ flx4: flexcom@fc018000 {
+ atmel,flexcom-mode = <ATMEL_FLEXCOM_MODE_TWI>;
+ status = "okay";
+
+ i2c2: i2c@600 {
+ compatible = "atmel,sama5d2-i2c";
+ reg = <0x600 0x200>;
+ interrupts = <23 IRQ_TYPE_LEVEL_HIGH 7>;
+ dmas = <0>, <0>;
+ dma-names = "tx", "rx";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&flx4_clk>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_flx4_default>;
+ atmel,fifo-size = <16>;
+ status = "okay";
+ };
+ };
+
i2c1: i2c@fc028000 {
dmas = <0>, <0>;
pinctrl-names = "default";
@@ -201,6 +247,18 @@
};
pinctrl@fc038000 {
+ pinctrl_flx0_default: flx0_default {
+ pinmux = <PIN_PB28__FLEXCOM0_IO0>,
+ <PIN_PB29__FLEXCOM0_IO1>;
+ bias-disable;
+ };
+
+ pinctrl_flx4_default: flx4_default {
+ pinmux = <PIN_PD12__FLEXCOM4_IO0>,
+ <PIN_PD13__FLEXCOM4_IO1>;
+ bias-disable;
+ };
+
pinctrl_i2c0_default: i2c0_default {
pinmux = <PIN_PD21__TWD0>,
<PIN_PD22__TWCK0>;
@@ -227,6 +285,46 @@
bias-disable;
};
+ pinctrl_sdmmc0_default: sdmmc0_default {
+ cmd_data {
+ pinmux = <PIN_PA1__SDMMC0_CMD>,
+ <PIN_PA2__SDMMC0_DAT0>,
+ <PIN_PA3__SDMMC0_DAT1>,
+ <PIN_PA4__SDMMC0_DAT2>,
+ <PIN_PA5__SDMMC0_DAT3>,
+ <PIN_PA6__SDMMC0_DAT4>,
+ <PIN_PA7__SDMMC0_DAT5>,
+ <PIN_PA8__SDMMC0_DAT6>,
+ <PIN_PA9__SDMMC0_DAT7>;
+ bias-pull-up;
+ };
+
+ ck_cd_rstn_vddsel {
+ pinmux = <PIN_PA0__SDMMC0_CK>,
+ <PIN_PA10__SDMMC0_RSTN>,
+ <PIN_PA11__SDMMC0_VDDSEL>,
+ <PIN_PA13__SDMMC0_CD>;
+ bias-disable;
+ };
+ };
+
+ pinctrl_sdmmc1_default: sdmmc1_default {
+ cmd_data {
+ pinmux = <PIN_PA28__SDMMC1_CMD>,
+ <PIN_PA18__SDMMC1_DAT0>,
+ <PIN_PA19__SDMMC1_DAT1>,
+ <PIN_PA20__SDMMC1_DAT2>,
+ <PIN_PA21__SDMMC1_DAT3>;
+ bias-pull-up;
+ };
+
+ conf-ck_cd {
+ pinmux = <PIN_PA22__SDMMC1_CK>,
+ <PIN_PA30__SDMMC1_CD>;
+ bias-disable;
+ };
+ };
+
pinctrl_spi0_default: spi0_default {
pinmux = <PIN_PA14__SPI0_SPCK>,
<PIN_PA15__SPI0_MOSI>,
diff --git a/arch/arm/boot/dts/at91-sama5d3_xplained.dts b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
index 8488ac53d22d..ff888d21c786 100644
--- a/arch/arm/boot/dts/at91-sama5d3_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
@@ -315,7 +315,7 @@
label = "PB_USER";
gpios = <&pioE 29 GPIO_ACTIVE_LOW>;
linux,code = <0x104>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
index 45371a1b61b3..131614f28e75 100644
--- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
@@ -50,7 +50,6 @@
compatible = "atmel,sama5d4-xplained", "atmel,sama5d4", "atmel,sama5";
chosen {
- bootargs = "ignore_loglevel earlyprintk";
stdout-path = "serial0:115200n8";
};
@@ -59,15 +58,6 @@
};
clocks {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
-
- main_clock: clock@0 {
- compatible = "atmel,osc", "fixed-clock";
- clock-frequency = <12000000>;
- };
-
slow_xtal {
clock-frequency = <32768>;
};
@@ -235,7 +225,7 @@
label = "pb_user1";
gpios = <&pioE 8 GPIO_ACTIVE_HIGH>;
linux,code = <0x100>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
diff --git a/arch/arm/boot/dts/at91-sama5d4ek.dts b/arch/arm/boot/dts/at91-sama5d4ek.dts
index 6d272c0125e3..2d4a33100af6 100644
--- a/arch/arm/boot/dts/at91-sama5d4ek.dts
+++ b/arch/arm/boot/dts/at91-sama5d4ek.dts
@@ -50,7 +50,6 @@
compatible = "atmel,sama5d4ek", "atmel,sama5d4", "atmel,sama5";
chosen {
- bootargs = "ignore_loglevel earlyprintk";
stdout-path = "serial0:115200n8";
};
@@ -59,15 +58,6 @@
};
clocks {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
-
- main_clock: clock@0 {
- compatible = "atmel,osc", "fixed-clock";
- clock-frequency = <12000000>;
- };
-
slow_xtal {
clock-frequency = <32768>;
};
@@ -304,7 +294,7 @@
label = "pb_user1";
gpios = <&pioE 13 GPIO_ACTIVE_HIGH>;
linux,code = <0x100>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
diff --git a/arch/arm/boot/dts/at91rm9200ek.dts b/arch/arm/boot/dts/at91rm9200ek.dts
index 8dab4b75ca97..f90e1c2d3caa 100644
--- a/arch/arm/boot/dts/at91rm9200ek.dts
+++ b/arch/arm/boot/dts/at91rm9200ek.dts
@@ -21,15 +21,6 @@
};
clocks {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
-
- main_clock: clock@0 {
- compatible = "atmel,osc", "fixed-clock";
- clock-frequency = <18432000>;
- };
-
slow_xtal {
clock-frequency = <32768>;
};
diff --git a/arch/arm/boot/dts/at91sam9261ek.dts b/arch/arm/boot/dts/at91sam9261ek.dts
index 2e92ac020f23..55bd51f07fa6 100644
--- a/arch/arm/boot/dts/at91sam9261ek.dts
+++ b/arch/arm/boot/dts/at91sam9261ek.dts
@@ -22,15 +22,6 @@
};
clocks {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
-
- main_clock: clock@0 {
- compatible = "atmel,osc", "fixed-clock";
- clock-frequency = <18432000>;
- };
-
slow_xtal {
clock-frequency = <32768>;
};
@@ -149,7 +140,7 @@
ti,debounce-tol = /bits/ 16 <65535>;
ti,debounce-max = /bits/ 16 <1>;
- linux,wakeup;
+ wakeup-source;
};
};
@@ -193,28 +184,28 @@
label = "button_0";
gpios = <&pioA 27 GPIO_ACTIVE_LOW>;
linux,code = <256>;
- gpio-key,wakeup;
+ wakeup-source;
};
button_1 {
label = "button_1";
gpios = <&pioA 26 GPIO_ACTIVE_LOW>;
linux,code = <257>;
- gpio-key,wakeup;
+ wakeup-source;
};
button_2 {
label = "button_2";
gpios = <&pioA 25 GPIO_ACTIVE_LOW>;
linux,code = <258>;
- gpio-key,wakeup;
+ wakeup-source;
};
button_3 {
label = "button_3";
gpios = <&pioA 24 GPIO_ACTIVE_LOW>;
linux,code = <259>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
};
diff --git a/arch/arm/boot/dts/at91sam9263ek.dts b/arch/arm/boot/dts/at91sam9263ek.dts
index 23381276ffb8..59df9d73d276 100644
--- a/arch/arm/boot/dts/at91sam9263ek.dts
+++ b/arch/arm/boot/dts/at91sam9263ek.dts
@@ -22,15 +22,6 @@
};
clocks {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
-
- main_clock: clock@0 {
- compatible = "atmel,osc", "fixed-clock";
- clock-frequency = <16367660>;
- };
-
slow_xtal {
clock-frequency = <32768>;
};
@@ -213,14 +204,14 @@
label = "left_click";
gpios = <&pioC 5 GPIO_ACTIVE_LOW>;
linux,code = <272>;
- gpio-key,wakeup;
+ wakeup-source;
};
right_click {
label = "right_click";
gpios = <&pioC 4 GPIO_ACTIVE_LOW>;
linux,code = <273>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
index 57548a2c5a1e..e9cc99b6353a 100644
--- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
+++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
@@ -19,15 +19,6 @@
};
clocks {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
-
- main_clock: clock@0 {
- compatible = "atmel,osc", "fixed-clock";
- clock-frequency = <18432000>;
- };
-
slow_xtal {
clock-frequency = <32768>;
};
@@ -206,14 +197,14 @@
label = "Button 3";
gpios = <&pioA 30 GPIO_ACTIVE_LOW>;
linux,code = <0x103>;
- gpio-key,wakeup;
+ wakeup-source;
};
btn4 {
label = "Button 4";
gpios = <&pioA 31 GPIO_ACTIVE_LOW>;
linux,code = <0x104>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
diff --git a/arch/arm/boot/dts/at91sam9m10g45ek.dts b/arch/arm/boot/dts/at91sam9m10g45ek.dts
index 9d16ef8453c5..2400c99134f7 100644
--- a/arch/arm/boot/dts/at91sam9m10g45ek.dts
+++ b/arch/arm/boot/dts/at91sam9m10g45ek.dts
@@ -24,15 +24,6 @@
};
clocks {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
-
- main_clock: clock@0 {
- compatible = "atmel,osc", "fixed-clock";
- clock-frequency = <12000000>;
- };
-
slow_xtal {
clock-frequency = <32768>;
};
@@ -323,14 +314,14 @@
label = "left_click";
gpios = <&pioB 6 GPIO_ACTIVE_LOW>;
linux,code = <272>;
- gpio-key,wakeup;
+ wakeup-source;
};
right_click {
label = "right_click";
gpios = <&pioB 7 GPIO_ACTIVE_LOW>;
linux,code = <273>;
- gpio-key,wakeup;
+ wakeup-source;
};
left {
diff --git a/arch/arm/boot/dts/at91sam9n12ek.dts b/arch/arm/boot/dts/at91sam9n12ek.dts
index acf3451a332d..ca4ddf86817a 100644
--- a/arch/arm/boot/dts/at91sam9n12ek.dts
+++ b/arch/arm/boot/dts/at91sam9n12ek.dts
@@ -23,15 +23,6 @@
};
clocks {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
-
- main_clock: clock@0 {
- compatible = "atmel,osc", "fixed-clock";
- clock-frequency = <16000000>;
- };
-
slow_xtal {
clock-frequency = <32768>;
};
@@ -219,7 +210,7 @@
label = "Enter";
gpios = <&pioB 3 GPIO_ACTIVE_LOW>;
linux,code = <28>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
diff --git a/arch/arm/boot/dts/at91sam9rlek.dts b/arch/arm/boot/dts/at91sam9rlek.dts
index 558c9f220bed..f10566f759cd 100644
--- a/arch/arm/boot/dts/at91sam9rlek.dts
+++ b/arch/arm/boot/dts/at91sam9rlek.dts
@@ -22,15 +22,6 @@
};
clocks {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
-
- main_clock: clock {
- compatible = "atmel,osc", "fixed-clock";
- clock-frequency = <12000000>;
- };
-
slow_xtal {
clock-frequency = <32768>;
};
@@ -225,14 +216,14 @@
label = "right_click";
gpios = <&pioB 0 GPIO_ACTIVE_LOW>;
linux,code = <273>;
- gpio-key,wakeup;
+ wakeup-source;
};
left_click {
label = "left_click";
gpios = <&pioB 1 GPIO_ACTIVE_LOW>;
linux,code = <272>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
diff --git a/arch/arm/boot/dts/at91sam9x5cm.dtsi b/arch/arm/boot/dts/at91sam9x5cm.dtsi
index 26112ebd15fc..b098ad8cd93a 100644
--- a/arch/arm/boot/dts/at91sam9x5cm.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5cm.dtsi
@@ -13,17 +13,6 @@
};
clocks {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
-
- main_clock: clock@0 {
- compatible = "atmel,osc", "fixed-clock";
- clock-frequency = <12000000>;
- };
- };
-
- clocks {
slow_xtal {
clock-frequency = <32768>;
};
diff --git a/arch/arm/boot/dts/berlin2q.dtsi b/arch/arm/boot/dts/berlin2q.dtsi
index 8ea177f375dd..fb1da99996ea 100644
--- a/arch/arm/boot/dts/berlin2q.dtsi
+++ b/arch/arm/boot/dts/berlin2q.dtsi
@@ -118,7 +118,8 @@
sdhci0: sdhci@ab0000 {
compatible = "mrvl,pxav3-mmc";
reg = <0xab0000 0x200>;
- clocks = <&chip_clk CLKID_SDIO1XIN>;
+ clocks = <&chip_clk CLKID_SDIO1XIN>, <&chip_clk CLKID_SDIO>;
+ clock-names = "io", "core";
interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
};
@@ -126,7 +127,8 @@
sdhci1: sdhci@ab0800 {
compatible = "mrvl,pxav3-mmc";
reg = <0xab0800 0x200>;
- clocks = <&chip_clk CLKID_SDIO1XIN>;
+ clocks = <&chip_clk CLKID_SDIO1XIN>, <&chip_clk CLKID_SDIO>;
+ clock-names = "io", "core";
interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
};
@@ -135,7 +137,7 @@
compatible = "mrvl,pxav3-mmc";
reg = <0xab1000 0x200>;
interrupts = <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&chip_clk CLKID_NFC_ECC>, <&chip_clk CLKID_NFC>;
+ clocks = <&chip_clk CLKID_NFC_ECC>, <&chip_clk CLKID_SDIO>;
clock-names = "io", "core";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi
index 3c99cfa1a876..eee636de4cd8 100644
--- a/arch/arm/boot/dts/dm816x.dtsi
+++ b/arch/arm/boot/dts/dm816x.dtsi
@@ -218,6 +218,7 @@
reg = <0x480c8000 0x2000>;
interrupts = <77>;
ti,hwmods = "mailbox";
+ #mbox-cells = <1>;
ti,mbox-num-users = <4>;
ti,mbox-num-fifos = <12>;
mbox_dsp: mbox_dsp {
@@ -279,8 +280,11 @@
ti,spi-num-cs = <4>;
ti,hwmods = "mcspi1";
dmas = <&edma 16 &edma 17
- &edma 18 &edma 19>;
- dma-names = "tx0", "rx0", "tx1", "rx1";
+ &edma 18 &edma 19
+ &edma 20 &edma 21
+ &edma 22 &edma 23>;
+ dma-names = "tx0", "rx0", "tx1", "rx1",
+ "tx2", "rx2", "tx3", "rx3";
};
mmc1: mmc@48060000 {
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index bc672fb91466..fe99231cbde5 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -1459,8 +1459,8 @@
interrupt-names = "tx", "rx";
dmas = <&sdma_xbar 133>, <&sdma_xbar 132>;
dma-names = "tx", "rx";
- clocks = <&mcasp3_ahclkx_mux>;
- clock-names = "fck";
+ clocks = <&mcasp3_aux_gfclk_mux>, <&mcasp3_ahclkx_mux>;
+ clock-names = "fck", "ahclkx";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi
index feb9d34b239c..f818ea483aeb 100644
--- a/arch/arm/boot/dts/imx27.dtsi
+++ b/arch/arm/boot/dts/imx27.dtsi
@@ -486,7 +486,10 @@
compatible = "fsl,imx27-usb";
reg = <0x10024000 0x200>;
interrupts = <56>;
- clocks = <&clks IMX27_CLK_USB_IPG_GATE>;
+ clocks = <&clks IMX27_CLK_USB_IPG_GATE>,
+ <&clks IMX27_CLK_USB_AHB_GATE>,
+ <&clks IMX27_CLK_USB_DIV>;
+ clock-names = "ipg", "ahb", "per";
fsl,usbmisc = <&usbmisc 0>;
status = "disabled";
};
@@ -495,7 +498,10 @@
compatible = "fsl,imx27-usb";
reg = <0x10024200 0x200>;
interrupts = <54>;
- clocks = <&clks IMX27_CLK_USB_IPG_GATE>;
+ clocks = <&clks IMX27_CLK_USB_IPG_GATE>,
+ <&clks IMX27_CLK_USB_AHB_GATE>,
+ <&clks IMX27_CLK_USB_DIV>;
+ clock-names = "ipg", "ahb", "per";
fsl,usbmisc = <&usbmisc 1>;
dr_mode = "host";
status = "disabled";
@@ -505,7 +511,10 @@
compatible = "fsl,imx27-usb";
reg = <0x10024400 0x200>;
interrupts = <55>;
- clocks = <&clks IMX27_CLK_USB_IPG_GATE>;
+ clocks = <&clks IMX27_CLK_USB_IPG_GATE>,
+ <&clks IMX27_CLK_USB_AHB_GATE>,
+ <&clks IMX27_CLK_USB_DIV>;
+ clock-names = "ipg", "ahb", "per";
fsl,usbmisc = <&usbmisc 2>;
dr_mode = "host";
status = "disabled";
@@ -515,7 +524,6 @@
#index-cells = <1>;
compatible = "fsl,imx27-usbmisc";
reg = <0x10024600 0x200>;
- clocks = <&clks IMX27_CLK_USB_AHB_GATE>;
};
sahara2: sahara@10025000 {
diff --git a/arch/arm/boot/dts/imx6q-gw5400-a.dts b/arch/arm/boot/dts/imx6q-gw5400-a.dts
index 58adf176425a..a51834e1dd27 100644
--- a/arch/arm/boot/dts/imx6q-gw5400-a.dts
+++ b/arch/arm/boot/dts/imx6q-gw5400-a.dts
@@ -154,7 +154,7 @@
&fec {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_enet>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
phy-reset-gpios = <&gpio1 30 GPIO_ACTIVE_HIGH>;
status = "okay";
};
diff --git a/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi
index 7b31fdb79ced..dc0cebfe22d7 100644
--- a/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi
@@ -94,7 +94,7 @@
&fec {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_enet>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
phy-reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>;
status = "okay";
};
diff --git a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi
index 1b66328a8498..18cd4114a23e 100644
--- a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi
@@ -154,7 +154,7 @@
&fec {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_enet>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
phy-reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>;
status = "okay";
};
diff --git a/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi
index 7c51839ff934..eea90f37bbb8 100644
--- a/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi
@@ -155,7 +155,7 @@
&fec {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_enet>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
phy-reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>;
status = "okay";
};
diff --git a/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi
index 929e0b37bd9e..6c11a2ae35ef 100644
--- a/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-gw54xx.dtsi
@@ -145,7 +145,7 @@
&fec {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_enet>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
phy-reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>;
status = "okay";
};
diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
index 8263fc18a7d9..d354d406954d 100644
--- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
@@ -113,14 +113,14 @@
&clks {
assigned-clocks = <&clks IMX6QDL_PLL4_BYPASS_SRC>,
<&clks IMX6QDL_PLL4_BYPASS>,
- <&clks IMX6QDL_CLK_PLL4_POST_DIV>,
<&clks IMX6QDL_CLK_LDB_DI0_SEL>,
- <&clks IMX6QDL_CLK_LDB_DI1_SEL>;
+ <&clks IMX6QDL_CLK_LDB_DI1_SEL>,
+ <&clks IMX6QDL_CLK_PLL4_POST_DIV>;
assigned-clock-parents = <&clks IMX6QDL_CLK_LVDS2_IN>,
<&clks IMX6QDL_PLL4_BYPASS_SRC>,
<&clks IMX6QDL_CLK_PLL3_USB_OTG>,
<&clks IMX6QDL_CLK_PLL3_USB_OTG>;
- assigned-clock-rates = <0>, <0>, <24576000>;
+ assigned-clock-rates = <0>, <0>, <0>, <0>, <24576000>;
};
&ecspi1 {
diff --git a/arch/arm/boot/dts/k2l-netcp.dtsi b/arch/arm/boot/dts/k2l-netcp.dtsi
index 01aef230773d..5acbd0dcc2ab 100644
--- a/arch/arm/boot/dts/k2l-netcp.dtsi
+++ b/arch/arm/boot/dts/k2l-netcp.dtsi
@@ -137,7 +137,7 @@ netcp: netcp@26000000 {
/* NetCP address range */
ranges = <0 0x26000000 0x1000000>;
- clocks = <&papllclk>, <&clkcpgmac>, <&chipclk12>;
+ clocks = <&clkosr>, <&papllclk>, <&clkcpgmac>, <&chipclk12>;
dma-coherent;
ti,navigator-dmas = <&dma_gbe 0>,
diff --git a/arch/arm/boot/dts/kirkwood-ts219.dtsi b/arch/arm/boot/dts/kirkwood-ts219.dtsi
index c56ab6bbfe3c..0e46560551f4 100644
--- a/arch/arm/boot/dts/kirkwood-ts219.dtsi
+++ b/arch/arm/boot/dts/kirkwood-ts219.dtsi
@@ -40,7 +40,7 @@
};
poweroff@12100 {
compatible = "qnap,power-off";
- reg = <0x12000 0x100>;
+ reg = <0x12100 0x100>;
clocks = <&gate_clk 7>;
};
spi@10600 {
diff --git a/arch/arm/boot/dts/omap4-duovero-parlor.dts b/arch/arm/boot/dts/omap4-duovero-parlor.dts
index 1a78f013f37a..b75f7b2b7c4a 100644
--- a/arch/arm/boot/dts/omap4-duovero-parlor.dts
+++ b/arch/arm/boot/dts/omap4-duovero-parlor.dts
@@ -189,3 +189,7 @@
};
};
+&uart3 {
+ interrupts-extended = <&wakeupgen GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH
+ &omap4_pmx_core OMAP4_UART3_RX>;
+};
diff --git a/arch/arm/boot/dts/rk3288-veyron-minnie.dts b/arch/arm/boot/dts/rk3288-veyron-minnie.dts
index 8fd8ef2c72da..85f0373df498 100644
--- a/arch/arm/boot/dts/rk3288-veyron-minnie.dts
+++ b/arch/arm/boot/dts/rk3288-veyron-minnie.dts
@@ -86,6 +86,10 @@
};
};
+&emmc {
+ /delete-property/mmc-hs200-1_8v;
+};
+
&gpio_keys {
pinctrl-0 = <&pwr_key_l &ap_lid_int_l &volum_down_l &volum_up_l>;
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index 6a79c9c526b8..04ea209f1737 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -452,8 +452,10 @@
clock-names = "tsadc", "apb_pclk";
resets = <&cru SRST_TSADC>;
reset-names = "tsadc-apb";
- pinctrl-names = "default";
- pinctrl-0 = <&otp_out>;
+ pinctrl-names = "init", "default", "sleep";
+ pinctrl-0 = <&otp_gpio>;
+ pinctrl-1 = <&otp_out>;
+ pinctrl-2 = <&otp_gpio>;
#thermal-sensor-cells = <1>;
rockchip,hw-tshut-temp = <95000>;
status = "disabled";
@@ -1395,6 +1397,10 @@
};
tsadc {
+ otp_gpio: otp-gpio {
+ rockchip,pins = <0 10 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
otp_out: otp-out {
rockchip,pins = <0 10 RK_FUNC_1 &pcfg_pull_none>;
};
diff --git a/arch/arm/boot/dts/sama5d35ek.dts b/arch/arm/boot/dts/sama5d35ek.dts
index d9a9aca1ccfd..e812f5c1bf70 100644
--- a/arch/arm/boot/dts/sama5d35ek.dts
+++ b/arch/arm/boot/dts/sama5d35ek.dts
@@ -49,7 +49,7 @@
label = "pb_user1";
gpios = <&pioE 27 GPIO_ACTIVE_HIGH>;
linux,code = <0x100>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
};
diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi
index 15bbaf690047..2193637b9cd2 100644
--- a/arch/arm/boot/dts/sama5d4.dtsi
+++ b/arch/arm/boot/dts/sama5d4.dtsi
@@ -1300,7 +1300,7 @@
};
watchdog@fc068640 {
- compatible = "atmel,at91sam9260-wdt";
+ compatible = "atmel,sama5d4-wdt";
reg = <0xfc068640 0x10>;
clocks = <&clk32k>;
status = "disabled";
diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
index 314f59c12162..d0c743853318 100644
--- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
+++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
@@ -25,9 +25,9 @@
cache-sets = <512>;
cache-line-size = <32>;
/* At full speed latency must be >=2 */
- arm,tag-latency = <2>;
- arm,data-latency = <2 2>;
- arm,dirty-latency = <2>;
+ arm,tag-latency = <8>;
+ arm,data-latency = <8 8>;
+ arm,dirty-latency = <8>;
};
mtu0: mtu@101e2000 {
diff --git a/arch/arm/boot/dts/sun6i-a31s-primo81.dts b/arch/arm/boot/dts/sun6i-a31s-primo81.dts
index 2d4250b1faf8..68b479b8772c 100644
--- a/arch/arm/boot/dts/sun6i-a31s-primo81.dts
+++ b/arch/arm/boot/dts/sun6i-a31s-primo81.dts
@@ -83,6 +83,7 @@
reg = <0x5d>;
interrupt-parent = <&pio>;
interrupts = <0 3 IRQ_TYPE_LEVEL_HIGH>; /* PA3 */
+ touchscreen-swapped-x-y;
};
};
diff --git a/arch/arm/boot/dts/tegra124-nyan.dtsi b/arch/arm/boot/dts/tegra124-nyan.dtsi
index 40c23a0b7cfc..ec1aa64ded68 100644
--- a/arch/arm/boot/dts/tegra124-nyan.dtsi
+++ b/arch/arm/boot/dts/tegra124-nyan.dtsi
@@ -399,7 +399,7 @@
/* CPU DFLL clock */
clock@0,70110000 {
- status = "okay";
+ status = "disabled";
vdd-cpu-supply = <&vdd_cpu>;
nvidia,i2c-fs-rate = <400000>;
};
diff --git a/arch/arm/boot/dts/usb_a9260_common.dtsi b/arch/arm/boot/dts/usb_a9260_common.dtsi
index 12edafefd44a..9beea8976584 100644
--- a/arch/arm/boot/dts/usb_a9260_common.dtsi
+++ b/arch/arm/boot/dts/usb_a9260_common.dtsi
@@ -115,7 +115,7 @@
label = "user_pb";
gpios = <&pioB 10 GPIO_ACTIVE_LOW>;
linux,code = <28>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
diff --git a/arch/arm/boot/dts/usb_a9263.dts b/arch/arm/boot/dts/usb_a9263.dts
index 68c0de36c339..8cc6edb29694 100644
--- a/arch/arm/boot/dts/usb_a9263.dts
+++ b/arch/arm/boot/dts/usb_a9263.dts
@@ -143,7 +143,7 @@
label = "user_pb";
gpios = <&pioB 10 GPIO_ACTIVE_LOW>;
linux,code = <28>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
diff --git a/arch/arm/boot/dts/versatile-ab.dts b/arch/arm/boot/dts/versatile-ab.dts
index 01f40197ea13..3279bf1a17a1 100644
--- a/arch/arm/boot/dts/versatile-ab.dts
+++ b/arch/arm/boot/dts/versatile-ab.dts
@@ -110,7 +110,11 @@
interrupt-parent = <&vic>;
interrupts = <31>; /* Cascaded to vic */
clear-mask = <0xffffffff>;
- valid-mask = <0xffc203f8>;
+ /*
+ * Valid interrupt lines mask according to
+ * table 4-36 page 4-50 of ARM DUI 0225D
+ */
+ valid-mask = <0x0760031b>;
};
dma@10130000 {
@@ -266,8 +270,8 @@
};
mmc@5000 {
compatible = "arm,pl180", "arm,primecell";
- reg = < 0x5000 0x1000>;
- interrupts-extended = <&vic 22 &sic 2>;
+ reg = <0x5000 0x1000>;
+ interrupts-extended = <&vic 22 &sic 1>;
clocks = <&xtal24mhz>, <&pclk>;
clock-names = "mclk", "apb_pclk";
};
diff --git a/arch/arm/boot/dts/versatile-pb.dts b/arch/arm/boot/dts/versatile-pb.dts
index b83137f66034..33a8eb28374e 100644
--- a/arch/arm/boot/dts/versatile-pb.dts
+++ b/arch/arm/boot/dts/versatile-pb.dts
@@ -5,6 +5,16 @@
compatible = "arm,versatile-pb";
amba {
+ /* The Versatile PB is using more SIC IRQ lines than the AB */
+ sic: intc@10003000 {
+ clear-mask = <0xffffffff>;
+ /*
+ * Valid interrupt lines mask according to
+ * figure 3-30 page 3-74 of ARM DUI 0224B
+ */
+ valid-mask = <0x7fe003ff>;
+ };
+
gpio2: gpio@101e6000 {
compatible = "arm,pl061", "arm,primecell";
reg = <0x101e6000 0x1000>;
@@ -67,6 +77,13 @@
};
fpga {
+ mmc@5000 {
+ /*
+ * Overrides the interrupt assignment from
+ * the Versatile AB board file.
+ */
+ interrupts-extended = <&sic 22 &sic 23>;
+ };
uart@9000 {
compatible = "arm,pl011", "arm,primecell";
reg = <0x9000 0x1000>;
@@ -86,7 +103,8 @@
mmc@b000 {
compatible = "arm,pl180", "arm,primecell";
reg = <0xb000 0x1000>;
- interrupts-extended = <&vic 23 &sic 2>;
+ interrupt-parent = <&sic>;
+ interrupts = <1>, <2>;
clocks = <&xtal24mhz>, <&pclk>;
clock-names = "mclk", "apb_pclk";
};
diff --git a/arch/arm/boot/dts/vf610-colibri.dtsi b/arch/arm/boot/dts/vf610-colibri.dtsi
index 19fe045b8334..2d7eab755210 100644
--- a/arch/arm/boot/dts/vf610-colibri.dtsi
+++ b/arch/arm/boot/dts/vf610-colibri.dtsi
@@ -18,8 +18,3 @@
reg = <0x80000000 0x10000000>;
};
};
-
-&L2 {
- arm,data-latency = <2 1 2>;
- arm,tag-latency = <3 2 3>;
-};
diff --git a/arch/arm/boot/dts/vf610.dtsi b/arch/arm/boot/dts/vf610.dtsi
index 5f8eb1bd782b..58bc6e448be5 100644
--- a/arch/arm/boot/dts/vf610.dtsi
+++ b/arch/arm/boot/dts/vf610.dtsi
@@ -19,7 +19,7 @@
reg = <0x40006000 0x1000>;
cache-unified;
cache-level = <2>;
- arm,data-latency = <1 1 1>;
+ arm,data-latency = <3 3 3>;
arm,tag-latency = <2 2 2>;
};
};
diff --git a/arch/arm/boot/dts/vfxxx.dtsi b/arch/arm/boot/dts/vfxxx.dtsi
index 6736bae43a5b..3cd1b27f2697 100644
--- a/arch/arm/boot/dts/vfxxx.dtsi
+++ b/arch/arm/boot/dts/vfxxx.dtsi
@@ -158,7 +158,7 @@
interrupts = <67 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks VF610_CLK_DSPI0>;
clock-names = "dspi";
- spi-num-chipselects = <5>;
+ spi-num-chipselects = <6>;
status = "disabled";
};
@@ -170,7 +170,7 @@
interrupts = <68 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks VF610_CLK_DSPI1>;
clock-names = "dspi";
- spi-num-chipselects = <5>;
+ spi-num-chipselects = <4>;
status = "disabled";
};
@@ -178,8 +178,10 @@
compatible = "fsl,vf610-sai";
reg = <0x40031000 0x1000>;
interrupts = <86 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clks VF610_CLK_SAI2>;
- clock-names = "sai";
+ clocks = <&clks VF610_CLK_SAI2>,
+ <&clks VF610_CLK_SAI2_DIV>,
+ <&clks 0>, <&clks 0>;
+ clock-names = "bus", "mclk1", "mclk2", "mclk3";
dma-names = "tx", "rx";
dmas = <&edma0 0 21>,
<&edma0 0 20>;
@@ -461,6 +463,8 @@
clock-names = "adc";
#io-channel-cells = <1>;
status = "disabled";
+ fsl,adck-max-frequency = <30000000>, <40000000>,
+ <20000000>;
};
esdhc0: esdhc@400b1000 {
@@ -472,8 +476,6 @@
<&clks VF610_CLK_ESDHC0>;
clock-names = "ipg", "ahb", "per";
status = "disabled";
- fsl,adck-max-frequency = <30000000>, <40000000>,
- <20000000>;
};
esdhc1: esdhc@400b2000 {
diff --git a/arch/arm/boot/dts/wm8650.dtsi b/arch/arm/boot/dts/wm8650.dtsi
index b1c59a766a13..e12213d16693 100644
--- a/arch/arm/boot/dts/wm8650.dtsi
+++ b/arch/arm/boot/dts/wm8650.dtsi
@@ -187,6 +187,15 @@
interrupts = <43>;
};
+ sdhc@d800a000 {
+ compatible = "wm,wm8505-sdhc";
+ reg = <0xd800a000 0x400>;
+ interrupts = <20>, <21>;
+ clocks = <&clksdhc>;
+ bus-width = <4>;
+ sdon-inverted;
+ };
+
fb: fb@d8050800 {
compatible = "wm,wm8505-fb";
reg = <0xd8050800 0x200>;
diff --git a/arch/arm/configs/at91_dt_defconfig b/arch/arm/configs/at91_dt_defconfig
index 1b1e5acd76e2..e4b1be66b3f5 100644
--- a/arch/arm/configs/at91_dt_defconfig
+++ b/arch/arm/configs/at91_dt_defconfig
@@ -125,7 +125,6 @@ CONFIG_POWER_RESET=y
# CONFIG_HWMON is not set
CONFIG_WATCHDOG=y
CONFIG_AT91SAM9X_WATCHDOG=y
-CONFIG_SSB=m
CONFIG_MFD_ATMEL_HLCDC=y
CONFIG_REGULATOR=y
CONFIG_REGULATOR_FIXED_VOLTAGE=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 69a22fdb52a5..cd7b198fc79e 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -366,6 +366,7 @@ CONFIG_BATTERY_MAX17042=m
CONFIG_CHARGER_MAX14577=m
CONFIG_CHARGER_MAX77693=m
CONFIG_CHARGER_TPS65090=y
+CONFIG_AXP20X_POWER=m
CONFIG_POWER_RESET_AS3722=y
CONFIG_POWER_RESET_GPIO=y
CONFIG_POWER_RESET_GPIO_RESTART=y
diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig
index a0c57ac88b27..63f7e6ce649a 100644
--- a/arch/arm/configs/sama5_defconfig
+++ b/arch/arm/configs/sama5_defconfig
@@ -129,7 +129,6 @@ CONFIG_GPIO_SYSFS=y
CONFIG_POWER_SUPPLY=y
CONFIG_POWER_RESET=y
# CONFIG_HWMON is not set
-CONFIG_SSB=m
CONFIG_MFD_ATMEL_FLEXCOM=y
CONFIG_REGULATOR=y
CONFIG_REGULATOR_FIXED_VOLTAGE=y
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index 3c36e16fcacf..b503a89441bf 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -84,6 +84,7 @@ CONFIG_SPI_SUN4I=y
CONFIG_SPI_SUN6I=y
CONFIG_GPIO_SYSFS=y
CONFIG_POWER_SUPPLY=y
+CONFIG_AXP20X_POWER=y
CONFIG_THERMAL=y
CONFIG_CPU_THERMAL=y
CONFIG_WATCHDOG=y
diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
index 6607d976e07d..7da5503c0591 100644
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@@ -21,6 +21,7 @@
#ifndef __ASSEMBLY__
#include <linux/io.h>
+#include <asm/barrier.h>
#define __ACCESS_CP15(CRn, Op1, CRm, Op2) p15, Op1, %0, CRn, CRm, Op2
#define __ACCESS_CP15_64(Op1, CRm) p15, Op1, %Q0, %R0, CRm
diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h
index be1d07d59ee9..1bd9510de1b9 100644
--- a/arch/arm/include/asm/irq.h
+++ b/arch/arm/include/asm/irq.h
@@ -40,6 +40,11 @@ extern void arch_trigger_all_cpu_backtrace(bool);
#define arch_trigger_all_cpu_backtrace(x) arch_trigger_all_cpu_backtrace(x)
#endif
+static inline int nr_legacy_irqs(void)
+{
+ return NR_IRQS_LEGACY;
+}
+
#endif
#endif
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index a9c80a2ea1a7..3095df091ff8 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -28,6 +28,18 @@
unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);
unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu);
+static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu,
+ u8 reg_num)
+{
+ return *vcpu_reg(vcpu, reg_num);
+}
+
+static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
+ unsigned long val)
+{
+ *vcpu_reg(vcpu, reg_num) = val;
+}
+
bool kvm_condition_valid(struct kvm_vcpu *vcpu);
void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr);
void kvm_inject_undefined(struct kvm_vcpu *vcpu);
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 8cc85a4ebec2..35c9db857ebe 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -510,10 +510,14 @@ __copy_to_user_std(void __user *to, const void *from, unsigned long n);
static inline unsigned long __must_check
__copy_to_user(void __user *to, const void *from, unsigned long n)
{
+#ifndef CONFIG_UACCESS_WITH_MEMCPY
unsigned int __ua_flags = uaccess_save_and_enable();
n = arm_copy_to_user(to, from, n);
uaccess_restore(__ua_flags);
return n;
+#else
+ return arm_copy_to_user(to, from, n);
+#endif
}
extern unsigned long __must_check
diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index 7a2a32a1d5a8..ede692ffa32e 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -416,6 +416,7 @@
#define __NR_execveat (__NR_SYSCALL_BASE+387)
#define __NR_userfaultfd (__NR_SYSCALL_BASE+388)
#define __NR_membarrier (__NR_SYSCALL_BASE+389)
+#define __NR_mlock2 (__NR_SYSCALL_BASE+390)
/*
* The following SWIs are ARM private.
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 6551d28c27e6..066f7f9ba411 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -17,11 +17,6 @@
#include <asm/mach/pci.h>
static int debug_pci;
-static resource_size_t (*align_resource)(struct pci_dev *dev,
- const struct resource *res,
- resource_size_t start,
- resource_size_t size,
- resource_size_t align) = NULL;
/*
* We can't use pci_get_device() here since we are
@@ -461,7 +456,6 @@ static void pcibios_init_hw(struct device *parent, struct hw_pci *hw,
sys->busnr = busnr;
sys->swizzle = hw->swizzle;
sys->map_irq = hw->map_irq;
- align_resource = hw->align_resource;
INIT_LIST_HEAD(&sys->resources);
if (hw->private_data)
@@ -470,6 +464,8 @@ static void pcibios_init_hw(struct device *parent, struct hw_pci *hw,
ret = hw->setup(nr, sys);
if (ret > 0) {
+ struct pci_host_bridge *host_bridge;
+
ret = pcibios_init_resources(nr, sys);
if (ret) {
kfree(sys);
@@ -491,6 +487,9 @@ static void pcibios_init_hw(struct device *parent, struct hw_pci *hw,
busnr = sys->bus->busn_res.end + 1;
list_add(&sys->node, head);
+
+ host_bridge = pci_find_host_bridge(sys->bus);
+ host_bridge->align_resource = hw->align_resource;
} else {
kfree(sys);
if (ret < 0)
@@ -578,14 +577,18 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
{
struct pci_dev *dev = data;
resource_size_t start = res->start;
+ struct pci_host_bridge *host_bridge;
if (res->flags & IORESOURCE_IO && start & 0x300)
start = (start + 0x3ff) & ~0x3ff;
start = (start + align - 1) & ~(align - 1);
- if (align_resource)
- return align_resource(dev, res, start, size, align);
+ host_bridge = pci_find_host_bridge(dev->bus);
+
+ if (host_bridge->align_resource)
+ return host_bridge->align_resource(dev, res,
+ start, size, align);
return start;
}
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index fde6c88d560c..ac368bb068d1 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -399,6 +399,7 @@
CALL(sys_execveat)
CALL(sys_userfaultfd)
CALL(sys_membarrier)
+ CALL(sys_mlock2)
#ifndef syscalls_counted
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
#define syscalls_counted
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 7a7c4cea5523..4adfb46e3ee9 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -95,6 +95,22 @@ void __show_regs(struct pt_regs *regs)
{
unsigned long flags;
char buf[64];
+#ifndef CONFIG_CPU_V7M
+ unsigned int domain;
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+ /*
+ * Get the domain register for the parent context. In user
+ * mode, we don't save the DACR, so lets use what it should
+ * be. For other modes, we place it after the pt_regs struct.
+ */
+ if (user_mode(regs))
+ domain = DACR_UACCESS_ENABLE;
+ else
+ domain = *(unsigned int *)(regs + 1);
+#else
+ domain = get_domain();
+#endif
+#endif
show_regs_print_info(KERN_DEFAULT);
@@ -123,21 +139,8 @@ void __show_regs(struct pt_regs *regs)
#ifndef CONFIG_CPU_V7M
{
- unsigned int domain = get_domain();
const char *segment;
-#ifdef CONFIG_CPU_SW_DOMAIN_PAN
- /*
- * Get the domain register for the parent context. In user
- * mode, we don't save the DACR, so lets use what it should
- * be. For other modes, we place it after the pt_regs struct.
- */
- if (user_mode(regs))
- domain = DACR_UACCESS_ENABLE;
- else
- domain = *(unsigned int *)(regs + 1);
-#endif
-
if ((domain & domain_mask(DOMAIN_USER)) ==
domain_val(DOMAIN_USER, DOMAIN_NOACCESS))
segment = "none";
@@ -163,11 +166,11 @@ void __show_regs(struct pt_regs *regs)
buf[0] = '\0';
#ifdef CONFIG_CPU_CP15_MMU
{
- unsigned int transbase, dac = get_domain();
+ unsigned int transbase;
asm("mrc p15, 0, %0, c2, c0\n\t"
: "=r" (transbase));
snprintf(buf, sizeof(buf), " Table: %08x DAC: %08x",
- transbase, dac);
+ transbase, domain);
}
#endif
asm("mrc p15, 0, %0, c1, c0\n" : "=r" (ctrl));
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index 5b26e7efa9ea..c3fe769d7558 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -36,10 +36,10 @@
*/
#define __user_swpX_asm(data, addr, res, temp, B) \
__asm__ __volatile__( \
- " mov %2, %1\n" \
- "0: ldrex"B" %1, [%3]\n" \
- "1: strex"B" %0, %2, [%3]\n" \
+ "0: ldrex"B" %2, [%3]\n" \
+ "1: strex"B" %0, %1, [%3]\n" \
" cmp %0, #0\n" \
+ " moveq %1, %2\n" \
" movne %0, %4\n" \
"2:\n" \
" .section .text.fixup,\"ax\"\n" \
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index b83f3b7737fb..087acb569b63 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -193,15 +193,44 @@ struct oabi_flock64 {
pid_t l_pid;
} __attribute__ ((packed,aligned(4)));
-asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd,
+static long do_locks(unsigned int fd, unsigned int cmd,
unsigned long arg)
{
- struct oabi_flock64 user;
struct flock64 kernel;
- mm_segment_t fs = USER_DS; /* initialized to kill a warning */
- unsigned long local_arg = arg;
- int ret;
+ struct oabi_flock64 user;
+ mm_segment_t fs;
+ long ret;
+
+ if (copy_from_user(&user, (struct oabi_flock64 __user *)arg,
+ sizeof(user)))
+ return -EFAULT;
+ kernel.l_type = user.l_type;
+ kernel.l_whence = user.l_whence;
+ kernel.l_start = user.l_start;
+ kernel.l_len = user.l_len;
+ kernel.l_pid = user.l_pid;
+
+ fs = get_fs();
+ set_fs(KERNEL_DS);
+ ret = sys_fcntl64(fd, cmd, (unsigned long)&kernel);
+ set_fs(fs);
+
+ if (!ret && (cmd == F_GETLK64 || cmd == F_OFD_GETLK)) {
+ user.l_type = kernel.l_type;
+ user.l_whence = kernel.l_whence;
+ user.l_start = kernel.l_start;
+ user.l_len = kernel.l_len;
+ user.l_pid = kernel.l_pid;
+ if (copy_to_user((struct oabi_flock64 __user *)arg,
+ &user, sizeof(user)))
+ ret = -EFAULT;
+ }
+ return ret;
+}
+asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd,
+ unsigned long arg)
+{
switch (cmd) {
case F_OFD_GETLK:
case F_OFD_SETLK:
@@ -209,39 +238,11 @@ asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd,
case F_GETLK64:
case F_SETLK64:
case F_SETLKW64:
- if (copy_from_user(&user, (struct oabi_flock64 __user *)arg,
- sizeof(user)))
- return -EFAULT;
- kernel.l_type = user.l_type;
- kernel.l_whence = user.l_whence;
- kernel.l_start = user.l_start;
- kernel.l_len = user.l_len;
- kernel.l_pid = user.l_pid;
- local_arg = (unsigned long)&kernel;
- fs = get_fs();
- set_fs(KERNEL_DS);
- }
-
- ret = sys_fcntl64(fd, cmd, local_arg);
+ return do_locks(fd, cmd, arg);
- switch (cmd) {
- case F_GETLK64:
- if (!ret) {
- user.l_type = kernel.l_type;
- user.l_whence = kernel.l_whence;
- user.l_start = kernel.l_start;
- user.l_len = kernel.l_len;
- user.l_pid = kernel.l_pid;
- if (copy_to_user((struct oabi_flock64 __user *)arg,
- &user, sizeof(user)))
- ret = -EFAULT;
- }
- case F_SETLK64:
- case F_SETLKW64:
- set_fs(fs);
+ default:
+ return sys_fcntl64(fd, cmd, arg);
}
-
- return ret;
}
struct oabi_epoll_event {
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index eab83b2435b8..e06fd299de08 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -564,17 +564,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
vcpu_sleep(vcpu);
/*
- * Disarming the background timer must be done in a
- * preemptible context, as this call may sleep.
- */
- kvm_timer_flush_hwstate(vcpu);
-
- /*
* Preparing the interrupts to be injected also
* involves poking the GIC, which must be done in a
* non-preemptible context.
*/
preempt_disable();
+ kvm_timer_flush_hwstate(vcpu);
kvm_vgic_flush_hwstate(vcpu);
local_irq_disable();
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 974b1c606d04..3a10c9f1d0a4 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -115,7 +115,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
data);
data = vcpu_data_host_to_guest(vcpu, data, len);
- *vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt) = data;
+ vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
}
return 0;
@@ -186,7 +186,8 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
rt = vcpu->arch.mmio_decode.rt;
if (is_write) {
- data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), len);
+ data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
+ len);
trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
mmio_write_buf(data_buf, len, data);
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 6984342da13d..61d96a645ff3 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -98,6 +98,11 @@ static void kvm_flush_dcache_pud(pud_t pud)
__kvm_flush_dcache_pud(pud);
}
+static bool kvm_is_device_pfn(unsigned long pfn)
+{
+ return !pfn_valid(pfn);
+}
+
/**
* stage2_dissolve_pmd() - clear and flush huge PMD entry
* @kvm: pointer to kvm structure.
@@ -213,7 +218,7 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
kvm_tlb_flush_vmid_ipa(kvm, addr);
/* No need to invalidate the cache for device mappings */
- if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+ if (!kvm_is_device_pfn(pte_pfn(old_pte)))
kvm_flush_dcache_pte(old_pte);
put_page(virt_to_page(pte));
@@ -305,8 +310,7 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
pte = pte_offset_kernel(pmd, addr);
do {
- if (!pte_none(*pte) &&
- (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+ if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte)))
kvm_flush_dcache_pte(*pte);
} while (pte++, addr += PAGE_SIZE, addr != end);
}
@@ -1037,11 +1041,6 @@ static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
return kvm_vcpu_dabt_iswrite(vcpu);
}
-static bool kvm_is_device_pfn(unsigned long pfn)
-{
- return !pfn_valid(pfn);
-}
-
/**
* stage2_wp_ptes - write protect PMD range
* @pmd: pointer to pmd entry
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 0b556968a6da..a9b3b905e661 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -75,7 +75,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
unsigned long context_id;
phys_addr_t target_pc;
- cpu_id = *vcpu_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK;
+ cpu_id = vcpu_get_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK;
if (vcpu_mode_is_32bit(source_vcpu))
cpu_id &= ~((u32) 0);
@@ -94,8 +94,8 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
return PSCI_RET_INVALID_PARAMS;
}
- target_pc = *vcpu_reg(source_vcpu, 2);
- context_id = *vcpu_reg(source_vcpu, 3);
+ target_pc = vcpu_get_reg(source_vcpu, 2);
+ context_id = vcpu_get_reg(source_vcpu, 3);
kvm_reset_vcpu(vcpu);
@@ -114,7 +114,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
* NOTE: We always update r0 (or x0) because for PSCI v0.1
* the general puspose registers are undefined upon CPU_ON.
*/
- *vcpu_reg(vcpu, 0) = context_id;
+ vcpu_set_reg(vcpu, 0, context_id);
vcpu->arch.power_off = false;
smp_mb(); /* Make sure the above is visible */
@@ -134,8 +134,8 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *tmp;
- target_affinity = *vcpu_reg(vcpu, 1);
- lowest_affinity_level = *vcpu_reg(vcpu, 2);
+ target_affinity = vcpu_get_reg(vcpu, 1);
+ lowest_affinity_level = vcpu_get_reg(vcpu, 2);
/* Determine target affinity mask */
target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
@@ -209,7 +209,7 @@ int kvm_psci_version(struct kvm_vcpu *vcpu)
static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
{
int ret = 1;
- unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
+ unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
unsigned long val;
switch (psci_fn) {
@@ -273,13 +273,13 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
break;
}
- *vcpu_reg(vcpu, 0) = val;
+ vcpu_set_reg(vcpu, 0, val);
return ret;
}
static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
{
- unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
+ unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
unsigned long val;
switch (psci_fn) {
@@ -295,7 +295,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
break;
}
- *vcpu_reg(vcpu, 0) = val;
+ vcpu_set_reg(vcpu, 0, val);
return 1;
}
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
index d72b90905132..588bbc288396 100644
--- a/arch/arm/lib/uaccess_with_memcpy.c
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -88,6 +88,7 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
static unsigned long noinline
__copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
{
+ unsigned long ua_flags;
int atomic;
if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
@@ -118,7 +119,9 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
if (tocopy > n)
tocopy = n;
+ ua_flags = uaccess_save_and_enable();
memcpy((void *)to, from, tocopy);
+ uaccess_restore(ua_flags);
to += tocopy;
from += tocopy;
n -= tocopy;
@@ -145,14 +148,21 @@ arm_copy_to_user(void __user *to, const void *from, unsigned long n)
* With frame pointer disabled, tail call optimization kicks in
* as well making this test almost invisible.
*/
- if (n < 64)
- return __copy_to_user_std(to, from, n);
- return __copy_to_user_memcpy(to, from, n);
+ if (n < 64) {
+ unsigned long ua_flags = uaccess_save_and_enable();
+ n = __copy_to_user_std(to, from, n);
+ uaccess_restore(ua_flags);
+ } else {
+ n = __copy_to_user_memcpy(to, from, n);
+ }
+ return n;
}
static unsigned long noinline
__clear_user_memset(void __user *addr, unsigned long n)
{
+ unsigned long ua_flags;
+
if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
memset((void *)addr, 0, n);
return 0;
@@ -175,7 +185,9 @@ __clear_user_memset(void __user *addr, unsigned long n)
if (tocopy > n)
tocopy = n;
+ ua_flags = uaccess_save_and_enable();
memset((void *)addr, 0, tocopy);
+ uaccess_restore(ua_flags);
addr += tocopy;
n -= tocopy;
@@ -193,9 +205,14 @@ out:
unsigned long arm_clear_user(void __user *addr, unsigned long n)
{
/* See rational for this in __copy_to_user() above. */
- if (n < 64)
- return __clear_user_std(addr, n);
- return __clear_user_memset(addr, n);
+ if (n < 64) {
+ unsigned long ua_flags = uaccess_save_and_enable();
+ n = __clear_user_std(addr, n);
+ uaccess_restore(ua_flags);
+ } else {
+ n = __clear_user_memset(addr, n);
+ }
+ return n;
}
#if 0
diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig
index 92673006e55c..28656c2b54a0 100644
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -4,7 +4,6 @@ menuconfig ARCH_AT91
select ARCH_REQUIRE_GPIOLIB
select COMMON_CLK_AT91
select PINCTRL
- select PINCTRL_AT91
select SOC_BUS
if ARCH_AT91
@@ -17,6 +16,7 @@ config SOC_SAMA5D2
select HAVE_AT91_USB_CLK
select HAVE_AT91_H32MX
select HAVE_AT91_GENERATED_CLK
+ select PINCTRL_AT91PIO4
help
Select this if ou are using one of Atmel's SAMA5D2 family SoC.
@@ -27,6 +27,7 @@ config SOC_SAMA5D3
select HAVE_AT91_UTMI
select HAVE_AT91_SMD
select HAVE_AT91_USB_CLK
+ select PINCTRL_AT91
help
Select this if you are using one of Atmel's SAMA5D3 family SoC.
This support covers SAMA5D31, SAMA5D33, SAMA5D34, SAMA5D35, SAMA5D36.
@@ -40,6 +41,7 @@ config SOC_SAMA5D4
select HAVE_AT91_SMD
select HAVE_AT91_USB_CLK
select HAVE_AT91_H32MX
+ select PINCTRL_AT91
help
Select this if you are using one of Atmel's SAMA5D4 family SoC.
@@ -50,6 +52,7 @@ config SOC_AT91RM9200
select CPU_ARM920T
select HAVE_AT91_USB_CLK
select MIGHT_HAVE_PCI
+ select PINCTRL_AT91
select SOC_SAM_V4_V5
select SRAM if PM
help
@@ -65,6 +68,7 @@ config SOC_AT91SAM9
select HAVE_AT91_UTMI
select HAVE_FB_ATMEL
select MEMORY
+ select PINCTRL_AT91
select SOC_SAM_V4_V5
select SRAM if PM
help
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 80e277cfcc8b..23726fb31741 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -41,8 +41,10 @@
* implementation should be moved down into the pinctrl driver and get
* called as part of the generic suspend/resume path.
*/
+#ifdef CONFIG_PINCTRL_AT91
extern void at91_pinctrl_gpio_suspend(void);
extern void at91_pinctrl_gpio_resume(void);
+#endif
static struct {
unsigned long uhp_udp_mask;
@@ -151,8 +153,9 @@ static void at91_pm_suspend(suspend_state_t state)
static int at91_pm_enter(suspend_state_t state)
{
+#ifdef CONFIG_PINCTRL_AT91
at91_pinctrl_gpio_suspend();
-
+#endif
switch (state) {
/*
* Suspend-to-RAM is like STANDBY plus slow clock mode, so
@@ -192,7 +195,9 @@ static int at91_pm_enter(suspend_state_t state)
error:
target_state = PM_SUSPEND_ON;
+#ifdef CONFIG_PINCTRL_AT91
at91_pinctrl_gpio_resume();
+#endif
return 0;
}
diff --git a/arch/arm/mach-dove/include/mach/entry-macro.S b/arch/arm/mach-dove/include/mach/entry-macro.S
index 72d622baaad3..df1d44bdc375 100644
--- a/arch/arm/mach-dove/include/mach/entry-macro.S
+++ b/arch/arm/mach-dove/include/mach/entry-macro.S
@@ -18,13 +18,13 @@
@ check low interrupts
ldr \irqstat, [\base, #IRQ_CAUSE_LOW_OFF]
ldr \tmp, [\base, #IRQ_MASK_LOW_OFF]
- mov \irqnr, #31
+ mov \irqnr, #32
ands \irqstat, \irqstat, \tmp
@ if no low interrupts set, check high interrupts
ldreq \irqstat, [\base, #IRQ_CAUSE_HIGH_OFF]
ldreq \tmp, [\base, #IRQ_MASK_HIGH_OFF]
- moveq \irqnr, #63
+ moveq \irqnr, #64
andeqs \irqstat, \irqstat, \tmp
@ find first active interrupt source
diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index 3a10f1a8317a..ff105399aae4 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -27,6 +27,7 @@ menuconfig ARCH_EXYNOS
select SRAM
select THERMAL
select MFD_SYSCON
+ select CLKSRC_EXYNOS_MCT
help
Support for SAMSUNG EXYNOS SoCs (EXYNOS4/5)
diff --git a/arch/arm/mach-exynos/pmu.c b/arch/arm/mach-exynos/pmu.c
index de68938ee6aa..c21e41dad19c 100644
--- a/arch/arm/mach-exynos/pmu.c
+++ b/arch/arm/mach-exynos/pmu.c
@@ -748,8 +748,12 @@ static void exynos5_powerdown_conf(enum sys_powerdown mode)
void exynos_sys_powerdown_conf(enum sys_powerdown mode)
{
unsigned int i;
+ const struct exynos_pmu_data *pmu_data;
+
+ if (!pmu_context)
+ return;
- const struct exynos_pmu_data *pmu_data = pmu_context->pmu_data;
+ pmu_data = pmu_context->pmu_data;
if (pmu_data->powerdown_conf)
pmu_data->powerdown_conf(mode);
diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c
index 8e7976a4c3e7..cfc696b972f3 100644
--- a/arch/arm/mach-imx/gpc.c
+++ b/arch/arm/mach-imx/gpc.c
@@ -177,6 +177,7 @@ static struct irq_chip imx_gpc_chip = {
.irq_unmask = imx_gpc_irq_unmask,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_set_wake = imx_gpc_irq_set_wake,
+ .irq_set_type = irq_chip_set_type_parent,
#ifdef CONFIG_SMP
.irq_set_affinity = irq_chip_set_affinity_parent,
#endif
diff --git a/arch/arm/mach-ixp4xx/include/mach/io.h b/arch/arm/mach-ixp4xx/include/mach/io.h
index b02439019963..7a0c13bf4269 100644
--- a/arch/arm/mach-ixp4xx/include/mach/io.h
+++ b/arch/arm/mach-ixp4xx/include/mach/io.h
@@ -143,7 +143,7 @@ static inline void __indirect_writesl(volatile void __iomem *bus_addr,
writel(*vaddr++, bus_addr);
}
-static inline unsigned char __indirect_readb(const volatile void __iomem *p)
+static inline u8 __indirect_readb(const volatile void __iomem *p)
{
u32 addr = (u32)p;
u32 n, byte_enables, data;
@@ -166,7 +166,7 @@ static inline void __indirect_readsb(const volatile void __iomem *bus_addr,
*vaddr++ = readb(bus_addr);
}
-static inline unsigned short __indirect_readw(const volatile void __iomem *p)
+static inline u16 __indirect_readw(const volatile void __iomem *p)
{
u32 addr = (u32)p;
u32 n, byte_enables, data;
@@ -189,7 +189,7 @@ static inline void __indirect_readsw(const volatile void __iomem *bus_addr,
*vaddr++ = readw(bus_addr);
}
-static inline unsigned long __indirect_readl(const volatile void __iomem *p)
+static inline u32 __indirect_readl(const volatile void __iomem *p)
{
u32 addr = (__force u32)p;
u32 data;
@@ -350,7 +350,7 @@ static inline void insl(u32 io_addr, void *p, u32 count)
((unsigned long)p <= (PIO_MASK + PIO_OFFSET)))
#define ioread8(p) ioread8(p)
-static inline unsigned int ioread8(const void __iomem *addr)
+static inline u8 ioread8(const void __iomem *addr)
{
unsigned long port = (unsigned long __force)addr;
if (__is_io_address(port))
@@ -378,7 +378,7 @@ static inline void ioread8_rep(const void __iomem *addr, void *vaddr, u32 count)
}
#define ioread16(p) ioread16(p)
-static inline unsigned int ioread16(const void __iomem *addr)
+static inline u16 ioread16(const void __iomem *addr)
{
unsigned long port = (unsigned long __force)addr;
if (__is_io_address(port))
@@ -407,7 +407,7 @@ static inline void ioread16_rep(const void __iomem *addr, void *vaddr,
}
#define ioread32(p) ioread32(p)
-static inline unsigned int ioread32(const void __iomem *addr)
+static inline u32 ioread32(const void __iomem *addr)
{
unsigned long port = (unsigned long __force)addr;
if (__is_io_address(port))
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index 5076d3f334d2..0517f0c1581a 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -65,6 +65,8 @@ config SOC_AM43XX
select MACH_OMAP_GENERIC
select MIGHT_HAVE_CACHE_L2X0
select HAVE_ARM_SCU
+ select GENERIC_CLOCKEVENTS_BROADCAST
+ select HAVE_ARM_TWD
config SOC_DRA7XX
bool "TI DRA7XX"
@@ -121,6 +123,7 @@ config ARCH_OMAP2PLUS_TYPICAL
select NEON if CPU_V7
select PM
select REGULATOR
+ select REGULATOR_FIXED_VOLTAGE
select TWL4030_CORE if ARCH_OMAP3 || ARCH_OMAP4
select TWL4030_POWER if ARCH_OMAP3 || ARCH_OMAP4
select VFP
@@ -201,7 +204,6 @@ config MACH_OMAP3_PANDORA
depends on ARCH_OMAP3
default y
select OMAP_PACKAGE_CBB
- select REGULATOR_FIXED_VOLTAGE if REGULATOR
config MACH_NOKIA_N810
bool
diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c
index 17a6f752a436..7b76ce01c21d 100644
--- a/arch/arm/mach-omap2/gpmc-onenand.c
+++ b/arch/arm/mach-omap2/gpmc-onenand.c
@@ -149,8 +149,8 @@ static int omap2_onenand_get_freq(struct omap_onenand_platform_data *cfg,
freq = 104;
break;
default:
- freq = 54;
- break;
+ pr_err("onenand rate not detected, bad GPMC async timings?\n");
+ freq = 0;
}
return freq;
@@ -271,6 +271,11 @@ static int omap2_onenand_setup_async(void __iomem *onenand_base)
struct gpmc_timings t;
int ret;
+ /*
+ * Note that we need to keep sync_write set for the call to
+ * omap2_onenand_set_async_mode() to work to detect the onenand
+ * supported clock rate for the sync timings.
+ */
if (gpmc_onenand_data->of_node) {
gpmc_read_settings_dt(gpmc_onenand_data->of_node,
&onenand_async);
@@ -281,12 +286,9 @@ static int omap2_onenand_setup_async(void __iomem *onenand_base)
else
gpmc_onenand_data->flags |= ONENAND_SYNC_READ;
onenand_async.sync_read = false;
- onenand_async.sync_write = false;
}
}
- omap2_onenand_set_async_mode(onenand_base);
-
omap2_onenand_calc_async_timings(&t);
ret = gpmc_cs_program_settings(gpmc_onenand_data->cs, &onenand_async);
@@ -310,6 +312,8 @@ static int omap2_onenand_setup_sync(void __iomem *onenand_base, int *freq_ptr)
if (!freq) {
/* Very first call freq is not known */
freq = omap2_onenand_get_freq(gpmc_onenand_data, onenand_base);
+ if (!freq)
+ return -ENODEV;
set_onenand_cfg(onenand_base);
}
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index 5305ec7341ec..79e1f876d1c9 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -143,9 +143,9 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
* Ensure that CPU power state is set to ON to avoid CPU
* powerdomain transition on wfi
*/
- clkdm_wakeup(cpu1_clkdm);
- omap_set_pwrdm_state(cpu1_pwrdm, PWRDM_POWER_ON);
- clkdm_allow_idle(cpu1_clkdm);
+ clkdm_wakeup_nolock(cpu1_clkdm);
+ pwrdm_set_next_pwrst(cpu1_pwrdm, PWRDM_POWER_ON);
+ clkdm_allow_idle_nolock(cpu1_clkdm);
if (IS_PM44XX_ERRATUM(PM_OMAP4_ROM_SMP_BOOT_ERRATUM_GICD)) {
while (gic_dist_disabled()) {
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index cc8a987149e2..48495ad82aba 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -890,6 +890,36 @@ static int _init_opt_clks(struct omap_hwmod *oh)
return ret;
}
+static void _enable_optional_clocks(struct omap_hwmod *oh)
+{
+ struct omap_hwmod_opt_clk *oc;
+ int i;
+
+ pr_debug("omap_hwmod: %s: enabling optional clocks\n", oh->name);
+
+ for (i = oh->opt_clks_cnt, oc = oh->opt_clks; i > 0; i--, oc++)
+ if (oc->_clk) {
+ pr_debug("omap_hwmod: enable %s:%s\n", oc->role,
+ __clk_get_name(oc->_clk));
+ clk_enable(oc->_clk);
+ }
+}
+
+static void _disable_optional_clocks(struct omap_hwmod *oh)
+{
+ struct omap_hwmod_opt_clk *oc;
+ int i;
+
+ pr_debug("omap_hwmod: %s: disabling optional clocks\n", oh->name);
+
+ for (i = oh->opt_clks_cnt, oc = oh->opt_clks; i > 0; i--, oc++)
+ if (oc->_clk) {
+ pr_debug("omap_hwmod: disable %s:%s\n", oc->role,
+ __clk_get_name(oc->_clk));
+ clk_disable(oc->_clk);
+ }
+}
+
/**
* _enable_clocks - enable hwmod main clock and interface clocks
* @oh: struct omap_hwmod *
@@ -917,6 +947,9 @@ static int _enable_clocks(struct omap_hwmod *oh)
clk_enable(os->_clk);
}
+ if (oh->flags & HWMOD_OPT_CLKS_NEEDED)
+ _enable_optional_clocks(oh);
+
/* The opt clocks are controlled by the device driver. */
return 0;
@@ -948,41 +981,14 @@ static int _disable_clocks(struct omap_hwmod *oh)
clk_disable(os->_clk);
}
+ if (oh->flags & HWMOD_OPT_CLKS_NEEDED)
+ _disable_optional_clocks(oh);
+
/* The opt clocks are controlled by the device driver. */
return 0;
}
-static void _enable_optional_clocks(struct omap_hwmod *oh)
-{
- struct omap_hwmod_opt_clk *oc;
- int i;
-
- pr_debug("omap_hwmod: %s: enabling optional clocks\n", oh->name);
-
- for (i = oh->opt_clks_cnt, oc = oh->opt_clks; i > 0; i--, oc++)
- if (oc->_clk) {
- pr_debug("omap_hwmod: enable %s:%s\n", oc->role,
- __clk_get_name(oc->_clk));
- clk_enable(oc->_clk);
- }
-}
-
-static void _disable_optional_clocks(struct omap_hwmod *oh)
-{
- struct omap_hwmod_opt_clk *oc;
- int i;
-
- pr_debug("omap_hwmod: %s: disabling optional clocks\n", oh->name);
-
- for (i = oh->opt_clks_cnt, oc = oh->opt_clks; i > 0; i--, oc++)
- if (oc->_clk) {
- pr_debug("omap_hwmod: disable %s:%s\n", oc->role,
- __clk_get_name(oc->_clk));
- clk_disable(oc->_clk);
- }
-}
-
/**
* _omap4_enable_module - enable CLKCTRL modulemode on OMAP4
* @oh: struct omap_hwmod *
diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h
index ca6df1a73475..76bce11c85a4 100644
--- a/arch/arm/mach-omap2/omap_hwmod.h
+++ b/arch/arm/mach-omap2/omap_hwmod.h
@@ -523,6 +523,8 @@ struct omap_hwmod_omap4_prcm {
* HWMOD_RECONFIG_IO_CHAIN: omap_hwmod code needs to reconfigure wake-up
* events by calling _reconfigure_io_chain() when a device is enabled
* or idled.
+ * HWMOD_OPT_CLKS_NEEDED: The optional clocks are needed for the module to
+ * operate and they need to be handled at the same time as the main_clk.
*/
#define HWMOD_SWSUP_SIDLE (1 << 0)
#define HWMOD_SWSUP_MSTANDBY (1 << 1)
@@ -538,6 +540,7 @@ struct omap_hwmod_omap4_prcm {
#define HWMOD_FORCE_MSTANDBY (1 << 11)
#define HWMOD_SWSUP_SIDLE_ACT (1 << 12)
#define HWMOD_RECONFIG_IO_CHAIN (1 << 13)
+#define HWMOD_OPT_CLKS_NEEDED (1 << 14)
/*
* omap_hwmod._int_flags definitions
diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
index 51d1ecb384bd..ee4e04434a94 100644
--- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
@@ -1298,6 +1298,44 @@ static struct omap_hwmod dra7xx_mcspi4_hwmod = {
};
/*
+ * 'mcasp' class
+ *
+ */
+static struct omap_hwmod_class_sysconfig dra7xx_mcasp_sysc = {
+ .sysc_offs = 0x0004,
+ .sysc_flags = SYSC_HAS_SIDLEMODE,
+ .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
+ .sysc_fields = &omap_hwmod_sysc_type3,
+};
+
+static struct omap_hwmod_class dra7xx_mcasp_hwmod_class = {
+ .name = "mcasp",
+ .sysc = &dra7xx_mcasp_sysc,
+};
+
+/* mcasp3 */
+static struct omap_hwmod_opt_clk mcasp3_opt_clks[] = {
+ { .role = "ahclkx", .clk = "mcasp3_ahclkx_mux" },
+};
+
+static struct omap_hwmod dra7xx_mcasp3_hwmod = {
+ .name = "mcasp3",
+ .class = &dra7xx_mcasp_hwmod_class,
+ .clkdm_name = "l4per2_clkdm",
+ .main_clk = "mcasp3_aux_gfclk_mux",
+ .flags = HWMOD_OPT_CLKS_NEEDED,
+ .prcm = {
+ .omap4 = {
+ .clkctrl_offs = DRA7XX_CM_L4PER2_MCASP3_CLKCTRL_OFFSET,
+ .context_offs = DRA7XX_RM_L4PER2_MCASP3_CONTEXT_OFFSET,
+ .modulemode = MODULEMODE_SWCTRL,
+ },
+ },
+ .opt_clks = mcasp3_opt_clks,
+ .opt_clks_cnt = ARRAY_SIZE(mcasp3_opt_clks),
+};
+
+/*
* 'mmc' class
*
*/
@@ -2566,6 +2604,22 @@ static struct omap_hwmod_ocp_if dra7xx_l3_main_1__hdmi = {
.user = OCP_USER_MPU | OCP_USER_SDMA,
};
+/* l4_per2 -> mcasp3 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per2__mcasp3 = {
+ .master = &dra7xx_l4_per2_hwmod,
+ .slave = &dra7xx_mcasp3_hwmod,
+ .clk = "l4_root_clk_div",
+ .user = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l3_main_1 -> mcasp3 */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__mcasp3 = {
+ .master = &dra7xx_l3_main_1_hwmod,
+ .slave = &dra7xx_mcasp3_hwmod,
+ .clk = "l3_iclk_div",
+ .user = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
/* l4_per1 -> elm */
static struct omap_hwmod_ocp_if dra7xx_l4_per1__elm = {
.master = &dra7xx_l4_per1_hwmod,
@@ -3308,6 +3362,8 @@ static struct omap_hwmod_ocp_if *dra7xx_hwmod_ocp_ifs[] __initdata = {
&dra7xx_l4_wkup__dcan1,
&dra7xx_l4_per2__dcan2,
&dra7xx_l4_per2__cpgmac0,
+ &dra7xx_l4_per2__mcasp3,
+ &dra7xx_l3_main_1__mcasp3,
&dra7xx_gmac__mdio,
&dra7xx_l4_cfg__dma_system,
&dra7xx_l3_main_1__dss,
diff --git a/arch/arm/mach-omap2/omap_hwmod_81xx_data.c b/arch/arm/mach-omap2/omap_hwmod_81xx_data.c
index b1288f56d509..6256052893ec 100644
--- a/arch/arm/mach-omap2/omap_hwmod_81xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_81xx_data.c
@@ -144,6 +144,7 @@ static struct omap_hwmod dm81xx_l4_ls_hwmod = {
.name = "l4_ls",
.clkdm_name = "alwon_l3s_clkdm",
.class = &l4_hwmod_class,
+ .flags = HWMOD_NO_IDLEST,
};
/*
@@ -155,6 +156,7 @@ static struct omap_hwmod dm81xx_l4_hs_hwmod = {
.name = "l4_hs",
.clkdm_name = "alwon_l3_med_clkdm",
.class = &l4_hwmod_class,
+ .flags = HWMOD_NO_IDLEST,
};
/* L3 slow -> L4 ls peripheral interface running at 125MHz */
@@ -850,6 +852,7 @@ static struct omap_hwmod dm816x_emac0_hwmod = {
.name = "emac0",
.clkdm_name = "alwon_ethernet_clkdm",
.class = &dm816x_emac_hwmod_class,
+ .flags = HWMOD_NO_IDLEST,
};
static struct omap_hwmod_ocp_if dm81xx_l4_hs__emac0 = {
diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index 1dfe34654c43..58144779dec4 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -24,9 +24,6 @@
#include <linux/platform_data/iommu-omap.h>
#include <linux/platform_data/wkup_m3.h>
-#include <asm/siginfo.h>
-#include <asm/signal.h>
-
#include "common.h"
#include "common-board-devices.h"
#include "dss-common.h"
@@ -385,29 +382,6 @@ static void __init omap3_pandora_legacy_init(void)
}
#endif /* CONFIG_ARCH_OMAP3 */
-#ifdef CONFIG_SOC_TI81XX
-static int fault_fixed_up;
-
-static int t410_abort_handler(unsigned long addr, unsigned int fsr,
- struct pt_regs *regs)
-{
- if ((fsr == 0x406 || fsr == 0xc06) && !fault_fixed_up) {
- pr_warn("External imprecise Data abort at addr=%#lx, fsr=%#x ignored.\n",
- addr, fsr);
- fault_fixed_up = 1;
- return 0;
- }
-
- return 1;
-}
-
-static void __init t410_abort_init(void)
-{
- hook_fault_code(16 + 6, t410_abort_handler, SIGBUS, BUS_OBJERR,
- "imprecise external abort");
-}
-#endif
-
#if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5)
static struct iommu_platform_data omap4_iommu_pdata = {
.reset_name = "mmu_cache",
@@ -536,9 +510,6 @@ static struct pdata_init pdata_quirks[] __initdata = {
{ "openpandora,omap3-pandora-600mhz", omap3_pandora_legacy_init, },
{ "openpandora,omap3-pandora-1ghz", omap3_pandora_legacy_init, },
#endif
-#ifdef CONFIG_SOC_TI81XX
- { "hp,t410", t410_abort_init, },
-#endif
#ifdef CONFIG_SOC_OMAP5
{ "ti,omap5-uevm", omap5_uevm_legacy_init, },
#endif
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 87b98bf92366..2dbd3785ee6f 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -301,11 +301,11 @@ static void omap3_pm_idle(void)
if (omap_irq_pending())
return;
- trace_cpu_idle(1, smp_processor_id());
+ trace_cpu_idle_rcuidle(1, smp_processor_id());
omap_sram_idle();
- trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
}
#ifdef CONFIG_SUSPEND
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index b18ebbefae09..f86692dbcfd5 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -320,6 +320,12 @@ static int __init omap_dm_timer_init_one(struct omap_dm_timer *timer,
return r;
}
+#if !defined(CONFIG_SMP) && defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)
+void tick_broadcast(const struct cpumask *mask)
+{
+}
+#endif
+
static void __init omap2_gp_clockevent_init(int gptimer_id,
const char *fck_source,
const char *property)
diff --git a/arch/arm/mach-orion5x/include/mach/entry-macro.S b/arch/arm/mach-orion5x/include/mach/entry-macro.S
index 79eb502a1e64..73919a36b577 100644
--- a/arch/arm/mach-orion5x/include/mach/entry-macro.S
+++ b/arch/arm/mach-orion5x/include/mach/entry-macro.S
@@ -21,5 +21,5 @@
@ find cause bits that are unmasked
ands \irqstat, \irqstat, \tmp @ clear Z flag if any
clzne \irqnr, \irqstat @ calc irqnr
- rsbne \irqnr, \irqnr, #31
+ rsbne \irqnr, \irqnr, #32
.endm
diff --git a/arch/arm/mach-pxa/ezx.c b/arch/arm/mach-pxa/ezx.c
index 9a9c15bfcd34..7c0d5618be5e 100644
--- a/arch/arm/mach-pxa/ezx.c
+++ b/arch/arm/mach-pxa/ezx.c
@@ -889,6 +889,7 @@ static void __init e680_init(void)
pxa_set_keypad_info(&e680_keypad_platform_data);
+ pwm_add_table(ezx_pwm_lookup, ARRAY_SIZE(ezx_pwm_lookup));
platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
platform_add_devices(ARRAY_AND_SIZE(e680_devices));
}
@@ -956,6 +957,7 @@ static void __init a1200_init(void)
pxa_set_keypad_info(&a1200_keypad_platform_data);
+ pwm_add_table(ezx_pwm_lookup, ARRAY_SIZE(ezx_pwm_lookup));
platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
platform_add_devices(ARRAY_AND_SIZE(a1200_devices));
}
@@ -1148,6 +1150,7 @@ static void __init a910_init(void)
platform_device_register(&a910_camera);
}
+ pwm_add_table(ezx_pwm_lookup, ARRAY_SIZE(ezx_pwm_lookup));
platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
platform_add_devices(ARRAY_AND_SIZE(a910_devices));
}
@@ -1215,6 +1218,7 @@ static void __init e6_init(void)
pxa_set_keypad_info(&e6_keypad_platform_data);
+ pwm_add_table(ezx_pwm_lookup, ARRAY_SIZE(ezx_pwm_lookup));
platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
platform_add_devices(ARRAY_AND_SIZE(e6_devices));
}
@@ -1256,6 +1260,7 @@ static void __init e2_init(void)
pxa_set_keypad_info(&e2_keypad_platform_data);
+ pwm_add_table(ezx_pwm_lookup, ARRAY_SIZE(ezx_pwm_lookup));
platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
platform_add_devices(ARRAY_AND_SIZE(e2_devices));
}
diff --git a/arch/arm/mach-pxa/palm27x.c b/arch/arm/mach-pxa/palm27x.c
index 13eba2b26e0a..8fbfb10047ec 100644
--- a/arch/arm/mach-pxa/palm27x.c
+++ b/arch/arm/mach-pxa/palm27x.c
@@ -344,7 +344,7 @@ void __init palm27x_pwm_init(int bl, int lcd)
{
palm_bl_power = bl;
palm_lcd_power = lcd;
- pwm_add_lookup(palm27x_pwm_lookup, ARRAY_SIZE(palm27x_pwm_lookup));
+ pwm_add_table(palm27x_pwm_lookup, ARRAY_SIZE(palm27x_pwm_lookup));
platform_device_register(&palm27x_backlight);
}
#endif
diff --git a/arch/arm/mach-pxa/palmtc.c b/arch/arm/mach-pxa/palmtc.c
index aebf6de62468..0b5c3876720c 100644
--- a/arch/arm/mach-pxa/palmtc.c
+++ b/arch/arm/mach-pxa/palmtc.c
@@ -169,7 +169,7 @@ static inline void palmtc_keys_init(void) {}
#if defined(CONFIG_BACKLIGHT_PWM) || defined(CONFIG_BACKLIGHT_PWM_MODULE)
static struct pwm_lookup palmtc_pwm_lookup[] = {
PWM_LOOKUP("pxa25x-pwm.1", 0, "pwm-backlight.0", NULL, PALMTC_PERIOD_NS,
- PWM_PERIOD_NORMAL),
+ PWM_POLARITY_NORMAL),
};
static struct platform_pwm_backlight_data palmtc_backlight_data = {
diff --git a/arch/arm/mach-s3c24xx/pll-s3c2440-12000000.c b/arch/arm/mach-s3c24xx/pll-s3c2440-12000000.c
index a19460e6e7b0..b355fca6cc2e 100644
--- a/arch/arm/mach-s3c24xx/pll-s3c2440-12000000.c
+++ b/arch/arm/mach-s3c24xx/pll-s3c2440-12000000.c
@@ -20,7 +20,7 @@
#include <plat/cpu.h>
#include <plat/cpu-freq-core.h>
-static struct cpufreq_frequency_table s3c2440_plls_12[] __initdata = {
+static struct cpufreq_frequency_table s3c2440_plls_12[] = {
{ .frequency = 75000000, .driver_data = PLLVAL(0x75, 3, 3), }, /* FVco 600.000000 */
{ .frequency = 80000000, .driver_data = PLLVAL(0x98, 4, 3), }, /* FVco 640.000000 */
{ .frequency = 90000000, .driver_data = PLLVAL(0x70, 2, 3), }, /* FVco 720.000000 */
diff --git a/arch/arm/mach-s3c24xx/pll-s3c2440-16934400.c b/arch/arm/mach-s3c24xx/pll-s3c2440-16934400.c
index 1191b2905625..be9a248b5ce9 100644
--- a/arch/arm/mach-s3c24xx/pll-s3c2440-16934400.c
+++ b/arch/arm/mach-s3c24xx/pll-s3c2440-16934400.c
@@ -20,7 +20,7 @@
#include <plat/cpu.h>
#include <plat/cpu-freq-core.h>
-static struct cpufreq_frequency_table s3c2440_plls_169344[] __initdata = {
+static struct cpufreq_frequency_table s3c2440_plls_169344[] = {
{ .frequency = 78019200, .driver_data = PLLVAL(121, 5, 3), }, /* FVco 624.153600 */
{ .frequency = 84067200, .driver_data = PLLVAL(131, 5, 3), }, /* FVco 672.537600 */
{ .frequency = 90115200, .driver_data = PLLVAL(141, 5, 3), }, /* FVco 720.921600 */
diff --git a/arch/arm/mach-shmobile/setup-r8a7793.c b/arch/arm/mach-shmobile/setup-r8a7793.c
index 1d2825cb7a65..5fce87f7f254 100644
--- a/arch/arm/mach-shmobile/setup-r8a7793.c
+++ b/arch/arm/mach-shmobile/setup-r8a7793.c
@@ -19,7 +19,7 @@
#include "common.h"
#include "rcar-gen2.h"
-static const char *r8a7793_boards_compat_dt[] __initconst = {
+static const char * const r8a7793_boards_compat_dt[] __initconst = {
"renesas,r8a7793",
NULL,
};
diff --git a/arch/arm/mach-sti/Kconfig b/arch/arm/mach-sti/Kconfig
index 125865daaf17..12dd1dc0a041 100644
--- a/arch/arm/mach-sti/Kconfig
+++ b/arch/arm/mach-sti/Kconfig
@@ -3,6 +3,7 @@ menuconfig ARCH_STI
select ARM_GIC
select ST_IRQCHIP
select ARM_GLOBAL_TIMER
+ select CLKSRC_ST_LPC
select PINCTRL
select PINCTRL_ST
select MFD_SYSCON
diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig
index c9ac19b24e5a..5eacdd61e61c 100644
--- a/arch/arm/mach-ux500/Kconfig
+++ b/arch/arm/mach-ux500/Kconfig
@@ -32,6 +32,7 @@ config UX500_SOC_DB8500
select PINCTRL_AB8540
select REGULATOR
select REGULATOR_DB8500_PRCMU
+ select CLKSRC_DBX500_PRCMU
select PM_GENERIC_DOMAINS if PM
config MACH_MOP500
diff --git a/arch/arm/mach-zx/Kconfig b/arch/arm/mach-zx/Kconfig
index 7fdc5bf24f9b..446334a25cf5 100644
--- a/arch/arm/mach-zx/Kconfig
+++ b/arch/arm/mach-zx/Kconfig
@@ -13,7 +13,7 @@ config SOC_ZX296702
select ARM_GLOBAL_TIMER
select HAVE_ARM_SCU if SMP
select HAVE_ARM_TWD if SMP
- select PM_GENERIC_DOMAINS
+ select PM_GENERIC_DOMAINS if PM
help
Support for ZTE ZX296702 SoC which is a dual core CortexA9MP
endif
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 845769e41332..c8c8b9ed02e0 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -165,13 +165,28 @@ static void flush_context(unsigned int cpu)
__flush_icache_all();
}
-static int is_reserved_asid(u64 asid)
+static bool check_update_reserved_asid(u64 asid, u64 newasid)
{
int cpu;
- for_each_possible_cpu(cpu)
- if (per_cpu(reserved_asids, cpu) == asid)
- return 1;
- return 0;
+ bool hit = false;
+
+ /*
+ * Iterate over the set of reserved ASIDs looking for a match.
+ * If we find one, then we can update our mm to use newasid
+ * (i.e. the same ASID in the current generation) but we can't
+ * exit the loop early, since we need to ensure that all copies
+ * of the old ASID are updated to reflect the mm. Failure to do
+ * so could result in us missing the reserved ASID in a future
+ * generation.
+ */
+ for_each_possible_cpu(cpu) {
+ if (per_cpu(reserved_asids, cpu) == asid) {
+ hit = true;
+ per_cpu(reserved_asids, cpu) = newasid;
+ }
+ }
+
+ return hit;
}
static u64 new_context(struct mm_struct *mm, unsigned int cpu)
@@ -181,12 +196,14 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
u64 generation = atomic64_read(&asid_generation);
if (asid != 0) {
+ u64 newasid = generation | (asid & ~ASID_MASK);
+
/*
* If our current ASID was active during a rollover, we
* can continue to use it and this was just a false alarm.
*/
- if (is_reserved_asid(asid))
- return generation | (asid & ~ASID_MASK);
+ if (check_update_reserved_asid(asid, newasid))
+ return newasid;
/*
* We had a valid ASID in a previous life, so try to re-use
@@ -194,7 +211,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
*/
asid &= ~ASID_MASK;
if (!__test_and_set_bit(asid, asid_map))
- goto bump_gen;
+ return newasid;
}
/*
@@ -216,11 +233,8 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
__set_bit(asid, asid_map);
cur_idx = asid;
-
-bump_gen:
- asid |= generation;
cpumask_clear(mm_cpumask(mm));
- return asid;
+ return asid | generation;
}
void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index e62400e5fb99..534a60ae282e 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1521,7 +1521,7 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
return -ENOMEM;
for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) {
- phys_addr_t phys = sg_phys(s) & PAGE_MASK;
+ phys_addr_t phys = page_to_phys(sg_page(s));
unsigned int len = PAGE_ALIGN(s->offset + s->length);
if (!is_coherent &&
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 8a63b4cdc0f2..7f8cd1b3557f 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -22,6 +22,7 @@
#include <linux/memblock.h>
#include <linux/dma-contiguous.h>
#include <linux/sizes.h>
+#include <linux/stop_machine.h>
#include <asm/cp15.h>
#include <asm/mach-types.h>
@@ -627,12 +628,10 @@ static struct section_perm ro_perms[] = {
* safe to be called with preemption disabled, as under stop_machine().
*/
static inline void section_update(unsigned long addr, pmdval_t mask,
- pmdval_t prot)
+ pmdval_t prot, struct mm_struct *mm)
{
- struct mm_struct *mm;
pmd_t *pmd;
- mm = current->active_mm;
pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr);
#ifdef CONFIG_ARM_LPAE
@@ -656,49 +655,82 @@ static inline bool arch_has_strict_perms(void)
return !!(get_cr() & CR_XP);
}
-#define set_section_perms(perms, field) { \
- size_t i; \
- unsigned long addr; \
- \
- if (!arch_has_strict_perms()) \
- return; \
- \
- for (i = 0; i < ARRAY_SIZE(perms); i++) { \
- if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) || \
- !IS_ALIGNED(perms[i].end, SECTION_SIZE)) { \
- pr_err("BUG: section %lx-%lx not aligned to %lx\n", \
- perms[i].start, perms[i].end, \
- SECTION_SIZE); \
- continue; \
- } \
- \
- for (addr = perms[i].start; \
- addr < perms[i].end; \
- addr += SECTION_SIZE) \
- section_update(addr, perms[i].mask, \
- perms[i].field); \
- } \
+void set_section_perms(struct section_perm *perms, int n, bool set,
+ struct mm_struct *mm)
+{
+ size_t i;
+ unsigned long addr;
+
+ if (!arch_has_strict_perms())
+ return;
+
+ for (i = 0; i < n; i++) {
+ if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) ||
+ !IS_ALIGNED(perms[i].end, SECTION_SIZE)) {
+ pr_err("BUG: section %lx-%lx not aligned to %lx\n",
+ perms[i].start, perms[i].end,
+ SECTION_SIZE);
+ continue;
+ }
+
+ for (addr = perms[i].start;
+ addr < perms[i].end;
+ addr += SECTION_SIZE)
+ section_update(addr, perms[i].mask,
+ set ? perms[i].prot : perms[i].clear, mm);
+ }
+
}
-static inline void fix_kernmem_perms(void)
+static void update_sections_early(struct section_perm perms[], int n)
{
- set_section_perms(nx_perms, prot);
+ struct task_struct *t, *s;
+
+ read_lock(&tasklist_lock);
+ for_each_process(t) {
+ if (t->flags & PF_KTHREAD)
+ continue;
+ for_each_thread(t, s)
+ set_section_perms(perms, n, true, s->mm);
+ }
+ read_unlock(&tasklist_lock);
+ set_section_perms(perms, n, true, current->active_mm);
+ set_section_perms(perms, n, true, &init_mm);
+}
+
+int __fix_kernmem_perms(void *unused)
+{
+ update_sections_early(nx_perms, ARRAY_SIZE(nx_perms));
+ return 0;
+}
+
+void fix_kernmem_perms(void)
+{
+ stop_machine(__fix_kernmem_perms, NULL, NULL);
}
#ifdef CONFIG_DEBUG_RODATA
+int __mark_rodata_ro(void *unused)
+{
+ update_sections_early(ro_perms, ARRAY_SIZE(ro_perms));
+ return 0;
+}
+
void mark_rodata_ro(void)
{
- set_section_perms(ro_perms, prot);
+ stop_machine(__mark_rodata_ro, NULL, NULL);
}
void set_kernel_text_rw(void)
{
- set_section_perms(ro_perms, clear);
+ set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), false,
+ current->active_mm);
}
void set_kernel_text_ro(void)
{
- set_section_perms(ro_perms, prot);
+ set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), true,
+ current->active_mm);
}
#endif /* CONFIG_DEBUG_RODATA */
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index de2b246fed38..8e1ea433c3f1 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -95,7 +95,7 @@ ENDPROC(cpu_v7_dcache_clean_area)
.equ cpu_v7_suspend_size, 4 * 9
#ifdef CONFIG_ARM_CPU_SUSPEND
ENTRY(cpu_v7_do_suspend)
- stmfd sp!, {r4 - r10, lr}
+ stmfd sp!, {r4 - r11, lr}
mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID
mrc p15, 0, r5, c13, c0, 3 @ User r/o thread ID
stmia r0!, {r4 - r5}
@@ -112,7 +112,7 @@ ENTRY(cpu_v7_do_suspend)
mrc p15, 0, r9, c1, c0, 1 @ Auxiliary control register
mrc p15, 0, r10, c1, c0, 2 @ Co-processor access control
stmia r0, {r5 - r11}
- ldmfd sp!, {r4 - r10, pc}
+ ldmfd sp!, {r4 - r11, pc}
ENDPROC(cpu_v7_do_suspend)
ENTRY(cpu_v7_do_resume)
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 2f4b14cfddb4..93d0b6d0b63e 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -187,19 +187,6 @@ static inline int mem_words_used(struct jit_ctx *ctx)
return fls(ctx->seen & SEEN_MEM);
}
-static inline bool is_load_to_a(u16 inst)
-{
- switch (inst) {
- case BPF_LD | BPF_W | BPF_LEN:
- case BPF_LD | BPF_W | BPF_ABS:
- case BPF_LD | BPF_H | BPF_ABS:
- case BPF_LD | BPF_B | BPF_ABS:
- return true;
- default:
- return false;
- }
-}
-
static void jit_fill_hole(void *area, unsigned int size)
{
u32 *ptr;
@@ -211,7 +198,6 @@ static void jit_fill_hole(void *area, unsigned int size)
static void build_prologue(struct jit_ctx *ctx)
{
u16 reg_set = saved_regs(ctx);
- u16 first_inst = ctx->skf->insns[0].code;
u16 off;
#ifdef CONFIG_FRAME_POINTER
@@ -241,7 +227,7 @@ static void build_prologue(struct jit_ctx *ctx)
emit(ARM_MOV_I(r_X, 0), ctx);
/* do not leak kernel data to userspace */
- if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst)))
+ if (bpf_needs_clear_a(&ctx->skf->insns[0]))
emit(ARM_MOV_I(r_A, 0), ctx);
/* stack space for the BPF_MEM words */
@@ -770,7 +756,8 @@ load_ind:
case BPF_ALU | BPF_RSH | BPF_K:
if (unlikely(k > 31))
return -1;
- emit(ARM_LSR_I(r_A, r_A, k), ctx);
+ if (k)
+ emit(ARM_LSR_I(r_A, r_A, k), ctx);
break;
case BPF_ALU | BPF_RSH | BPF_X:
update_on_xread(ctx);
@@ -1061,7 +1048,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
}
build_epilogue(&ctx);
- flush_icache_range((u32)ctx.target, (u32)(ctx.target + ctx.idx));
+ flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx));
#if __LINUX_ARM_ARCH__ < 7
if (ctx.imm_count)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9ac16a482ff1..871f21783866 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -49,7 +49,7 @@ config ARM64
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_BITREVERSE
select HAVE_ARCH_JUMP_LABEL
- select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP
+ select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
select HAVE_ARCH_KGDB
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
@@ -316,6 +316,27 @@ config ARM64_ERRATUM_832075
If unsure, say Y.
+config ARM64_ERRATUM_834220
+ bool "Cortex-A57: 834220: Stage 2 translation fault might be incorrectly reported in presence of a Stage 1 fault"
+ depends on KVM
+ default y
+ help
+ This option adds an alternative code sequence to work around ARM
+ erratum 834220 on Cortex-A57 parts up to r1p2.
+
+ Affected Cortex-A57 parts might report a Stage 2 translation
+ fault as the result of a Stage 1 fault for load crossing a
+ page boundary when there is a permission or device memory
+ alignment fault at Stage 1 and a translation fault at Stage 2.
+
+ The workaround is to verify that the Stage 1 translation
+ doesn't generate a fault before handling the Stage 2 fault.
+ Please note that this does not necessarily enable the workaround,
+ as it depends on the alternative framework, which will only patch
+ the kernel if an affected CPU is detected.
+
+ If unsure, say Y.
+
config ARM64_ERRATUM_845719
bool "Cortex-A53: 845719: a load might read incorrect data"
depends on COMPAT
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
index e81cd48d6245..925552e7b4f3 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
@@ -269,6 +269,7 @@
clock-frequency = <0>; /* Updated by bootloader */
voltage-ranges = <1800 1800 3300 3300>;
sdhci,auto-cmd12;
+ little-endian;
bus-width = <4>;
};
@@ -277,6 +278,7 @@
reg = <0x0 0x2300000 0x0 0x10000>;
interrupts = <0 36 0x4>; /* Level high type */
gpio-controller;
+ little-endian;
#gpio-cells = <2>;
interrupt-controller;
#interrupt-cells = <2>;
@@ -287,6 +289,7 @@
reg = <0x0 0x2310000 0x0 0x10000>;
interrupts = <0 36 0x4>; /* Level high type */
gpio-controller;
+ little-endian;
#gpio-cells = <2>;
interrupt-controller;
#interrupt-cells = <2>;
@@ -297,6 +300,7 @@
reg = <0x0 0x2320000 0x0 0x10000>;
interrupts = <0 37 0x4>; /* Level high type */
gpio-controller;
+ little-endian;
#gpio-cells = <2>;
interrupt-controller;
#interrupt-cells = <2>;
@@ -307,6 +311,7 @@
reg = <0x0 0x2330000 0x0 0x10000>;
interrupts = <0 37 0x4>; /* Level high type */
gpio-controller;
+ little-endian;
#gpio-cells = <2>;
interrupt-controller;
#interrupt-cells = <2>;
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c
index ce47792a983d..f7bd9bf0bbb3 100644
--- a/arch/arm64/crypto/aes-ce-cipher.c
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -237,7 +237,7 @@ EXPORT_SYMBOL(ce_aes_setkey);
static struct crypto_alg aes_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-ce",
- .cra_priority = 300,
+ .cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 030cdcb46c6b..2731d3b25ed2 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -77,6 +77,7 @@
#ifndef __ASSEMBLY__
#include <linux/stringify.h>
+#include <asm/barrier.h>
/*
* Low-level accessors
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 624f9679f4b0..9622eb48f894 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -64,27 +64,31 @@ do { \
#define smp_load_acquire(p) \
({ \
- typeof(*p) ___p1; \
+ union { typeof(*p) __val; char __c[1]; } __u; \
compiletime_assert_atomic_type(*p); \
switch (sizeof(*p)) { \
case 1: \
asm volatile ("ldarb %w0, %1" \
- : "=r" (___p1) : "Q" (*p) : "memory"); \
+ : "=r" (*(__u8 *)__u.__c) \
+ : "Q" (*p) : "memory"); \
break; \
case 2: \
asm volatile ("ldarh %w0, %1" \
- : "=r" (___p1) : "Q" (*p) : "memory"); \
+ : "=r" (*(__u16 *)__u.__c) \
+ : "Q" (*p) : "memory"); \
break; \
case 4: \
asm volatile ("ldar %w0, %1" \
- : "=r" (___p1) : "Q" (*p) : "memory"); \
+ : "=r" (*(__u32 *)__u.__c) \
+ : "Q" (*p) : "memory"); \
break; \
case 8: \
asm volatile ("ldar %0, %1" \
- : "=r" (___p1) : "Q" (*p) : "memory"); \
+ : "=r" (*(__u64 *)__u.__c) \
+ : "Q" (*p) : "memory"); \
break; \
} \
- ___p1; \
+ __u.__val; \
})
#define read_barrier_depends() do { } while(0)
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 7fbed6919b54..eb8432bb82b8 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -23,7 +23,6 @@
*/
#include <linux/types.h>
#include <linux/sched.h>
-#include <linux/ptrace.h>
#define COMPAT_USER_HZ 100
#ifdef __AARCH64EB__
@@ -234,7 +233,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
return (u32)(unsigned long)uptr;
}
-#define compat_user_stack_pointer() (user_stack_pointer(current_pt_regs()))
+#define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current)))
static inline void __user *arch_compat_alloc_user_space(long len)
{
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 11d5bb0fdd54..8f271b83f910 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -29,8 +29,9 @@
#define ARM64_HAS_PAN 4
#define ARM64_HAS_LSE_ATOMICS 5
#define ARM64_WORKAROUND_CAVIUM_23154 6
+#define ARM64_WORKAROUND_834220 7
-#define ARM64_NCAPS 7
+#define ARM64_NCAPS 8
#ifndef __ASSEMBLY__
@@ -46,8 +47,12 @@ enum ftr_type {
#define FTR_STRICT true /* SANITY check strict matching required */
#define FTR_NONSTRICT false /* SANITY check ignored */
+#define FTR_SIGNED true /* Value should be treated as signed */
+#define FTR_UNSIGNED false /* Value should be treated as unsigned */
+
struct arm64_ftr_bits {
- bool strict; /* CPU Sanity check: strict matching required ? */
+ bool sign; /* Value is signed ? */
+ bool strict; /* CPU Sanity check: strict matching required ? */
enum ftr_type type;
u8 shift;
u8 width;
@@ -123,6 +128,18 @@ cpuid_feature_extract_field(u64 features, int field)
return cpuid_feature_extract_field_width(features, field, 4);
}
+static inline unsigned int __attribute_const__
+cpuid_feature_extract_unsigned_field_width(u64 features, int field, int width)
+{
+ return (u64)(features << (64 - width - field)) >> (64 - width);
+}
+
+static inline unsigned int __attribute_const__
+cpuid_feature_extract_unsigned_field(u64 features, int field)
+{
+ return cpuid_feature_extract_unsigned_field_width(features, field, 4);
+}
+
static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp)
{
return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
@@ -130,7 +147,9 @@ static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp)
static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val)
{
- return cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width);
+ return ftrp->sign ?
+ cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width) :
+ cpuid_feature_extract_unsigned_field_width(val, ftrp->shift, ftrp->width);
}
static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index 54d0ead41afc..61e08f360e31 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -18,7 +18,6 @@
#ifdef __KERNEL__
-#include <linux/acpi.h>
#include <linux/types.h>
#include <linux/vmalloc.h>
@@ -26,22 +25,16 @@
#include <asm/xen/hypervisor.h>
#define DMA_ERROR_CODE (~(dma_addr_t)0)
-extern struct dma_map_ops *dma_ops;
extern struct dma_map_ops dummy_dma_ops;
static inline struct dma_map_ops *__generic_dma_ops(struct device *dev)
{
- if (unlikely(!dev))
- return dma_ops;
- else if (dev->archdata.dma_ops)
+ if (dev && dev->archdata.dma_ops)
return dev->archdata.dma_ops;
- else if (acpi_disabled)
- return dma_ops;
/*
- * When ACPI is enabled, if arch_set_dma_ops is not called,
- * we will disable device DMA capability by setting it
- * to dummy_dma_ops.
+ * We expect no ISA devices, and all other DMA masters are expected to
+ * have someone call arch_setup_dma_ops at device creation time.
*/
return &dummy_dma_ops;
}
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index e54415ec6935..9732908bfc8a 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -138,16 +138,18 @@ extern struct pmu perf_ops_bp;
/* Determine number of BRP registers available. */
static inline int get_num_brps(void)
{
+ u64 dfr0 = read_system_reg(SYS_ID_AA64DFR0_EL1);
return 1 +
- cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+ cpuid_feature_extract_unsigned_field(dfr0,
ID_AA64DFR0_BRPS_SHIFT);
}
/* Determine number of WRP registers available. */
static inline int get_num_wrps(void)
{
+ u64 dfr0 = read_system_reg(SYS_ID_AA64DFR0_EL1);
return 1 +
- cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+ cpuid_feature_extract_unsigned_field(dfr0,
ID_AA64DFR0_WRPS_SHIFT);
}
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 23eb450b820b..8e8d30684392 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -7,4 +7,9 @@ struct pt_regs;
extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
+static inline int nr_legacy_irqs(void)
+{
+ return 0;
+}
+
#endif
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 17e92f05b1fe..25a40213bd9b 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -99,12 +99,22 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
*vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT;
}
-static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num)
+/*
+ * vcpu_get_reg and vcpu_set_reg should always be passed a register number
+ * coming from a read of ESR_EL2. Otherwise, it may give the wrong result on
+ * AArch32 with banked registers.
+ */
+static inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu,
+ u8 reg_num)
{
- if (vcpu_mode_is_32bit(vcpu))
- return vcpu_reg32(vcpu, reg_num);
+ return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs.regs[reg_num];
+}
- return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num];
+static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
+ unsigned long val)
+{
+ if (reg_num != 31)
+ vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val;
}
/* Get vcpu SPSR for current mode */
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index c0e87898ba96..24165784b803 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -101,7 +101,7 @@ static inline void cpu_set_default_tcr_t0sz(void)
#define destroy_context(mm) do { } while(0)
void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
-#define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; })
+#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; })
/*
* This is called when "tsk" is about to enter lazy TLB mode.
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 9819a9426b69..63f52b55defe 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -81,6 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
@@ -275,10 +276,14 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
* hardware updates of the pte (ptep_set_access_flags safely changes
* valid ptes without going through an invalid entry).
*/
- if (IS_ENABLED(CONFIG_DEBUG_VM) && IS_ENABLED(CONFIG_ARM64_HW_AFDBM) &&
- pte_valid(*ptep)) {
- BUG_ON(!pte_young(pte));
- BUG_ON(pte_write(*ptep) && !pte_dirty(pte));
+ if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) &&
+ pte_valid(*ptep) && pte_valid(pte)) {
+ VM_WARN_ONCE(!pte_young(pte),
+ "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
+ __func__, pte_val(*ptep), pte_val(pte));
+ VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(pte),
+ "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
+ __func__, pte_val(*ptep), pte_val(pte));
}
set_pte(ptep, pte);
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 24926f2504f7..feb6b4efa641 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -75,6 +75,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
(1 << MIDR_VARIANT_SHIFT) | 2),
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_834220
+ {
+ /* Cortex-A57 r0p0 - r1p2 */
+ .desc = "ARM erratum 834220",
+ .capability = ARM64_WORKAROUND_834220,
+ MIDR_RANGE(MIDR_CORTEX_A57, 0x00,
+ (1 << MIDR_VARIANT_SHIFT) | 2),
+ },
+#endif
#ifdef CONFIG_ARM64_ERRATUM_845719
{
/* Cortex-A53 r0p[01234] */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index c8cf89223b5a..0669c63281ea 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -44,8 +44,9 @@ unsigned int compat_elf_hwcap2 __read_mostly;
DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
-#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+#define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
{ \
+ .sign = SIGNED, \
.strict = STRICT, \
.type = TYPE, \
.shift = SHIFT, \
@@ -53,6 +54,14 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
.safe_val = SAFE_VAL, \
}
+/* Define a feature with signed values */
+#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+ __ARM64_FTR_BITS(FTR_SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
+
+/* Define a feature with unsigned value */
+#define U_ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+ __ARM64_FTR_BITS(FTR_UNSIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
+
#define ARM64_FTR_END \
{ \
.width = 0, \
@@ -99,7 +108,7 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
* Differing PARange is fine as long as all peripherals and memory are mapped
* within the minimum PARange of all CPUs
*/
- ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
+ U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -115,18 +124,18 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
};
static struct arm64_ftr_bits ftr_ctr[] = {
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */
+ U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */
+ U_ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */
+ U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */
+ U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */
/*
* Linux can handle differing I-cache policies. Userspace JITs will
* make use of *minLine
*/
- ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */
+ U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0), /* RAZ */
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */
+ U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */
ARM64_FTR_END,
};
@@ -144,12 +153,12 @@ static struct arm64_ftr_bits ftr_id_mmfr0[] = {
static struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
+ U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
+ U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
+ U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
+ U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
+ U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
+ U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
ARM64_FTR_END,
};
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 706679d0a0b4..212ae6361d8b 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -30,6 +30,7 @@
#include <linux/seq_file.h>
#include <linux/sched.h>
#include <linux/smp.h>
+#include <linux/delay.h>
/*
* In case the boot CPU is hotpluggable, we record its initial state and
@@ -112,6 +113,10 @@ static int c_show(struct seq_file *m, void *v)
*/
seq_printf(m, "processor\t: %d\n", i);
+ seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
+ loops_per_jiffy / (500000UL/HZ),
+ loops_per_jiffy / (5000UL/HZ) % 100);
+
/*
* Dump out the common processor features in a single line.
* Userspace should read the hwcaps with getauxval(AT_HWCAP)
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index de46b50f4cdf..4eeb17198cfa 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -127,7 +127,11 @@ static int __init uefi_init(void)
table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables;
config_tables = early_memremap(efi_to_phys(efi.systab->tables),
table_size);
-
+ if (config_tables == NULL) {
+ pr_warn("Unable to map EFI config table array.\n");
+ retval = -ENOMEM;
+ goto out;
+ }
retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables,
sizeof(efi_config_table_64_t), NULL);
@@ -209,6 +213,14 @@ void __init efi_init(void)
PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK)));
memmap.phys_map = params.mmap;
memmap.map = early_memremap(params.mmap, params.mmap_size);
+ if (memmap.map == NULL) {
+ /*
+ * If we are booting via UEFI, the UEFI memory map is the only
+ * description of memory we have, so there is little point in
+ * proceeding if we cannot access it.
+ */
+ panic("Unable to map EFI memory map.\n");
+ }
memmap.map_end = memmap.map + params.mmap_size;
memmap.desc_size = params.desc_size;
memmap.desc_version = params.desc_ver;
@@ -224,8 +236,9 @@ static bool __init efi_virtmap_init(void)
{
efi_memory_desc_t *md;
+ init_new_context(NULL, &efi_mm);
+
for_each_efi_memory_desc(&memmap, md) {
- u64 paddr, npages, size;
pgprot_t prot;
if (!(md->attribute & EFI_MEMORY_RUNTIME))
@@ -233,11 +246,6 @@ static bool __init efi_virtmap_init(void)
if (md->virt_addr == 0)
return false;
- paddr = md->phys_addr;
- npages = md->num_pages;
- memrange_efi_to_native(&paddr, &npages);
- size = npages << PAGE_SHIFT;
-
pr_info(" EFI remap 0x%016llx => %p\n",
md->phys_addr, (void *)md->virt_addr);
@@ -254,7 +262,9 @@ static bool __init efi_virtmap_init(void)
else
prot = PAGE_KERNEL;
- create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot);
+ create_pgd_mapping(&efi_mm, md->phys_addr, md->virt_addr,
+ md->num_pages << EFI_PAGE_SHIFT,
+ __pgprot(pgprot_val(prot) | PTE_NG));
}
return true;
}
@@ -270,12 +280,12 @@ static int __init arm64_enable_runtime_services(void)
if (!efi_enabled(EFI_BOOT)) {
pr_info("EFI services will not be available.\n");
- return -1;
+ return 0;
}
if (efi_runtime_disabled()) {
pr_info("EFI runtime services will be disabled.\n");
- return -1;
+ return 0;
}
pr_info("Remapping and enabling EFI services.\n");
@@ -285,7 +295,7 @@ static int __init arm64_enable_runtime_services(void)
mapsize);
if (!memmap.map) {
pr_err("Failed to remap EFI memory map\n");
- return -1;
+ return -ENOMEM;
}
memmap.map_end = memmap.map + mapsize;
efi.memmap = &memmap;
@@ -294,13 +304,13 @@ static int __init arm64_enable_runtime_services(void)
sizeof(efi_system_table_t));
if (!efi.systab) {
pr_err("Failed to remap EFI System Table\n");
- return -1;
+ return -ENOMEM;
}
set_bit(EFI_SYSTEM_TABLES, &efi.flags);
if (!efi_virtmap_init()) {
pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n");
- return -1;
+ return -ENOMEM;
}
/* Set up runtime services function pointers */
@@ -329,14 +339,7 @@ core_initcall(arm64_dmi_init);
static void efi_set_pgd(struct mm_struct *mm)
{
- if (mm == &init_mm)
- cpu_set_reserved_ttbr0();
- else
- cpu_switch_mm(mm->pgd, mm);
-
- local_flush_tlb_all();
- if (icache_is_aivivt())
- __local_flush_icache_all();
+ switch_mm(NULL, mm, NULL);
}
void efi_virtmap_load(void)
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index fce95e17cf7f..1095aa483a1c 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -1,3 +1,4 @@
+#include <linux/ftrace.h>
#include <linux/percpu.h>
#include <linux/slab.h>
#include <asm/cacheflush.h>
@@ -71,6 +72,13 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
local_dbg_save(flags);
/*
+ * Function graph tracer state gets incosistent when the kernel
+ * calls functions that never return (aka suspend finishers) hence
+ * disable graph tracing during their execution.
+ */
+ pause_graph_tracing();
+
+ /*
* mm context saved on the stack, it will be restored when
* the cpu comes out of reset through the identity mapped
* page tables, so that the thread address space is properly
@@ -111,6 +119,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
hw_breakpoint_restore(NULL);
}
+ unpause_graph_tracing();
+
/*
* Restore pstate flags. OS lock and mdscr have been already
* restored, so from this point onwards, debugging is fully
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 1ee2c3937d4e..71426a78db12 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -5,6 +5,7 @@
*/
#include <asm-generic/vmlinux.lds.h>
+#include <asm/cache.h>
#include <asm/kernel-pgtable.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
@@ -140,7 +141,7 @@ SECTIONS
ARM_EXIT_KEEP(EXIT_DATA)
}
- PERCPU_SECTION(64)
+ PERCPU_SECTION(L1_CACHE_BYTES)
. = ALIGN(PAGE_SIZE);
__init_end = .;
@@ -158,7 +159,7 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
_data = .;
_sdata = .;
- RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE)
+ RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
PECOFF_EDATA_PADDING
_edata = .;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 68a0759b1375..15f0477b0d2a 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -37,7 +37,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
int ret;
- trace_kvm_hvc_arm64(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
+ trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
kvm_vcpu_hvc_get_imm(vcpu));
ret = kvm_psci_call(vcpu);
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 1599701ef044..86c289832272 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -864,6 +864,10 @@ ENTRY(__kvm_flush_vm_context)
ENDPROC(__kvm_flush_vm_context)
__kvm_hyp_panic:
+ // Stash PAR_EL1 before corrupting it in __restore_sysregs
+ mrs x0, par_el1
+ push x0, xzr
+
// Guess the context by looking at VTTBR:
// If zero, then we're already a host.
// Otherwise restore a minimal host context before panicing.
@@ -898,7 +902,7 @@ __kvm_hyp_panic:
mrs x3, esr_el2
mrs x4, far_el2
mrs x5, hpfar_el2
- mrs x6, par_el1
+ pop x6, xzr // active context PAR_EL1
mrs x7, tpidr_el2
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
@@ -914,7 +918,7 @@ __kvm_hyp_panic:
ENDPROC(__kvm_hyp_panic)
__hyp_panic_str:
- .ascii "HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0"
+ .ascii "HYP panic:\nPS:%08x PC:%016x ESR:%08x\nFAR:%016x HPFAR:%016x PAR:%016x\nVCPU:%p\n\0"
.align 2
@@ -1015,9 +1019,15 @@ el1_trap:
b.ne 1f // Not an abort we care about
/* This is an abort. Check for permission fault */
+alternative_if_not ARM64_WORKAROUND_834220
and x2, x1, #ESR_ELx_FSC_TYPE
cmp x2, #FSC_PERM
b.ne 1f // Not a permission fault
+alternative_else
+ nop // Use the permission fault path to
+ nop // check for a valid S1 translation,
+ nop // regardless of the ESR value.
+alternative_endif
/*
* Check for Stage-1 page table walk, which is guaranteed
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 85c57158dcd9..648112e90ed5 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -48,7 +48,7 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
/* Note: These now point to the banked copies */
*vcpu_spsr(vcpu) = new_spsr_value;
- *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
+ *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
/* Branch to exception vector */
if (sctlr & (1 << 13))
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 87a64e8db04c..d2650e84faf2 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -78,7 +78,7 @@ static u32 get_ccsidr(u32 csselr)
* See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
*/
static bool access_dcsw(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
+ struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (!p->is_write)
@@ -94,21 +94,19 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
* sys_regs and leave it in complete control of the caches.
*/
static bool access_vm_reg(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
+ struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- unsigned long val;
bool was_enabled = vcpu_has_cache_enabled(vcpu);
BUG_ON(!p->is_write);
- val = *vcpu_reg(vcpu, p->Rt);
if (!p->is_aarch32) {
- vcpu_sys_reg(vcpu, r->reg) = val;
+ vcpu_sys_reg(vcpu, r->reg) = p->regval;
} else {
if (!p->is_32bit)
- vcpu_cp15_64_high(vcpu, r->reg) = val >> 32;
- vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL;
+ vcpu_cp15_64_high(vcpu, r->reg) = upper_32_bits(p->regval);
+ vcpu_cp15_64_low(vcpu, r->reg) = lower_32_bits(p->regval);
}
kvm_toggle_cache(vcpu, was_enabled);
@@ -122,22 +120,19 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
* for both AArch64 and AArch32 accesses.
*/
static bool access_gic_sgi(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
+ struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- u64 val;
-
if (!p->is_write)
return read_from_write_only(vcpu, p);
- val = *vcpu_reg(vcpu, p->Rt);
- vgic_v3_dispatch_sgi(vcpu, val);
+ vgic_v3_dispatch_sgi(vcpu, p->regval);
return true;
}
static bool trap_raz_wi(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
+ struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write)
@@ -147,19 +142,19 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
}
static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
+ struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write) {
return ignore_write(vcpu, p);
} else {
- *vcpu_reg(vcpu, p->Rt) = (1 << 3);
+ p->regval = (1 << 3);
return true;
}
}
static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
+ struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write) {
@@ -167,7 +162,7 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
} else {
u32 val;
asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val));
- *vcpu_reg(vcpu, p->Rt) = val;
+ p->regval = val;
return true;
}
}
@@ -200,17 +195,17 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
* now use the debug registers.
*/
static bool trap_debug_regs(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
+ struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write) {
- vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+ vcpu_sys_reg(vcpu, r->reg) = p->regval;
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
} else {
- *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
+ p->regval = vcpu_sys_reg(vcpu, r->reg);
}
- trace_trap_reg(__func__, r->reg, p->is_write, *vcpu_reg(vcpu, p->Rt));
+ trace_trap_reg(__func__, r->reg, p->is_write, p->regval);
return true;
}
@@ -225,10 +220,10 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
* hyp.S code switches between host and guest values in future.
*/
static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
+ struct sys_reg_params *p,
u64 *dbg_reg)
{
- u64 val = *vcpu_reg(vcpu, p->Rt);
+ u64 val = p->regval;
if (p->is_32bit) {
val &= 0xffffffffUL;
@@ -240,19 +235,16 @@ static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
}
static inline void dbg_to_reg(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
+ struct sys_reg_params *p,
u64 *dbg_reg)
{
- u64 val = *dbg_reg;
-
+ p->regval = *dbg_reg;
if (p->is_32bit)
- val &= 0xffffffffUL;
-
- *vcpu_reg(vcpu, p->Rt) = val;
+ p->regval &= 0xffffffffUL;
}
static inline bool trap_bvr(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
+ struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
@@ -294,7 +286,7 @@ static inline void reset_bvr(struct kvm_vcpu *vcpu,
}
static inline bool trap_bcr(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
+ struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
@@ -337,7 +329,7 @@ static inline void reset_bcr(struct kvm_vcpu *vcpu,
}
static inline bool trap_wvr(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
+ struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
@@ -380,7 +372,7 @@ static inline void reset_wvr(struct kvm_vcpu *vcpu,
}
static inline bool trap_wcr(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
+ struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
@@ -687,7 +679,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
};
static bool trap_dbgidr(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
+ struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write) {
@@ -697,23 +689,23 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1);
u32 el3 = !!cpuid_feature_extract_field(pfr, ID_AA64PFR0_EL3_SHIFT);
- *vcpu_reg(vcpu, p->Rt) = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
- (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
- (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20) |
- (6 << 16) | (el3 << 14) | (el3 << 12));
+ p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
+ (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
+ (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20)
+ | (6 << 16) | (el3 << 14) | (el3 << 12));
return true;
}
}
static bool trap_debug32(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
+ struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write) {
- vcpu_cp14(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+ vcpu_cp14(vcpu, r->reg) = p->regval;
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
} else {
- *vcpu_reg(vcpu, p->Rt) = vcpu_cp14(vcpu, r->reg);
+ p->regval = vcpu_cp14(vcpu, r->reg);
}
return true;
@@ -731,7 +723,7 @@ static bool trap_debug32(struct kvm_vcpu *vcpu,
*/
static inline bool trap_xvr(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
+ struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
@@ -740,12 +732,12 @@ static inline bool trap_xvr(struct kvm_vcpu *vcpu,
u64 val = *dbg_reg;
val &= 0xffffffffUL;
- val |= *vcpu_reg(vcpu, p->Rt) << 32;
+ val |= p->regval << 32;
*dbg_reg = val;
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
} else {
- *vcpu_reg(vcpu, p->Rt) = *dbg_reg >> 32;
+ p->regval = *dbg_reg >> 32;
}
trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
@@ -991,7 +983,7 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
* Return 0 if the access has been handled, and -1 if not.
*/
static int emulate_cp(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *params,
+ struct sys_reg_params *params,
const struct sys_reg_desc *table,
size_t num)
{
@@ -1062,12 +1054,12 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
{
struct sys_reg_params params;
u32 hsr = kvm_vcpu_get_hsr(vcpu);
+ int Rt = (hsr >> 5) & 0xf;
int Rt2 = (hsr >> 10) & 0xf;
params.is_aarch32 = true;
params.is_32bit = false;
params.CRm = (hsr >> 1) & 0xf;
- params.Rt = (hsr >> 5) & 0xf;
params.is_write = ((hsr & 1) == 0);
params.Op0 = 0;
@@ -1076,15 +1068,12 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
params.CRn = 0;
/*
- * Massive hack here. Store Rt2 in the top 32bits so we only
- * have one register to deal with. As we use the same trap
+ * Make a 64-bit value out of Rt and Rt2. As we use the same trap
* backends between AArch32 and AArch64, we get away with it.
*/
if (params.is_write) {
- u64 val = *vcpu_reg(vcpu, params.Rt);
- val &= 0xffffffff;
- val |= *vcpu_reg(vcpu, Rt2) << 32;
- *vcpu_reg(vcpu, params.Rt) = val;
+ params.regval = vcpu_get_reg(vcpu, Rt) & 0xffffffff;
+ params.regval |= vcpu_get_reg(vcpu, Rt2) << 32;
}
if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
@@ -1095,11 +1084,10 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
unhandled_cp_access(vcpu, &params);
out:
- /* Do the opposite hack for the read side */
+ /* Split up the value between registers for the read side */
if (!params.is_write) {
- u64 val = *vcpu_reg(vcpu, params.Rt);
- val >>= 32;
- *vcpu_reg(vcpu, Rt2) = val;
+ vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval));
+ vcpu_set_reg(vcpu, Rt2, upper_32_bits(params.regval));
}
return 1;
@@ -1118,21 +1106,24 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
{
struct sys_reg_params params;
u32 hsr = kvm_vcpu_get_hsr(vcpu);
+ int Rt = (hsr >> 5) & 0xf;
params.is_aarch32 = true;
params.is_32bit = true;
params.CRm = (hsr >> 1) & 0xf;
- params.Rt = (hsr >> 5) & 0xf;
+ params.regval = vcpu_get_reg(vcpu, Rt);
params.is_write = ((hsr & 1) == 0);
params.CRn = (hsr >> 10) & 0xf;
params.Op0 = 0;
params.Op1 = (hsr >> 14) & 0x7;
params.Op2 = (hsr >> 17) & 0x7;
- if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
- return 1;
- if (!emulate_cp(vcpu, &params, global, nr_global))
+ if (!emulate_cp(vcpu, &params, target_specific, nr_specific) ||
+ !emulate_cp(vcpu, &params, global, nr_global)) {
+ if (!params.is_write)
+ vcpu_set_reg(vcpu, Rt, params.regval);
return 1;
+ }
unhandled_cp_access(vcpu, &params);
return 1;
@@ -1175,7 +1166,7 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
}
static int emulate_sys_reg(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *params)
+ struct sys_reg_params *params)
{
size_t num;
const struct sys_reg_desc *table, *r;
@@ -1230,6 +1221,8 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
struct sys_reg_params params;
unsigned long esr = kvm_vcpu_get_hsr(vcpu);
+ int Rt = (esr >> 5) & 0x1f;
+ int ret;
trace_kvm_handle_sys_reg(esr);
@@ -1240,10 +1233,14 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
params.CRn = (esr >> 10) & 0xf;
params.CRm = (esr >> 1) & 0xf;
params.Op2 = (esr >> 17) & 0x7;
- params.Rt = (esr >> 5) & 0x1f;
+ params.regval = vcpu_get_reg(vcpu, Rt);
params.is_write = !(esr & 1);
- return emulate_sys_reg(vcpu, &params);
+ ret = emulate_sys_reg(vcpu, &params);
+
+ if (!params.is_write)
+ vcpu_set_reg(vcpu, Rt, params.regval);
+ return ret;
}
/******************************************************************************
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index eaa324e4db4d..dbbb01cfbee9 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -28,7 +28,7 @@ struct sys_reg_params {
u8 CRn;
u8 CRm;
u8 Op2;
- u8 Rt;
+ u64 regval;
bool is_write;
bool is_aarch32;
bool is_32bit; /* Only valid if is_aarch32 is true */
@@ -44,7 +44,7 @@ struct sys_reg_desc {
/* Trapped access from guest, if non-NULL. */
bool (*access)(struct kvm_vcpu *,
- const struct sys_reg_params *,
+ struct sys_reg_params *,
const struct sys_reg_desc *);
/* Initialization for vcpu. */
@@ -77,9 +77,9 @@ static inline bool ignore_write(struct kvm_vcpu *vcpu,
}
static inline bool read_zero(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p)
+ struct sys_reg_params *p)
{
- *vcpu_reg(vcpu, p->Rt) = 0;
+ p->regval = 0;
return true;
}
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 1e4576824165..ed90578fa120 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -31,13 +31,13 @@
#include "sys_regs.h"
static bool access_actlr(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
+ struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write)
return ignore_write(vcpu, p);
- *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, ACTLR_EL1);
+ p->regval = vcpu_sys_reg(vcpu, ACTLR_EL1);
return true;
}
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index f636a2639f03..e87f53ff5f58 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -76,13 +76,28 @@ static void flush_context(unsigned int cpu)
__flush_icache_all();
}
-static int is_reserved_asid(u64 asid)
+static bool check_update_reserved_asid(u64 asid, u64 newasid)
{
int cpu;
- for_each_possible_cpu(cpu)
- if (per_cpu(reserved_asids, cpu) == asid)
- return 1;
- return 0;
+ bool hit = false;
+
+ /*
+ * Iterate over the set of reserved ASIDs looking for a match.
+ * If we find one, then we can update our mm to use newasid
+ * (i.e. the same ASID in the current generation) but we can't
+ * exit the loop early, since we need to ensure that all copies
+ * of the old ASID are updated to reflect the mm. Failure to do
+ * so could result in us missing the reserved ASID in a future
+ * generation.
+ */
+ for_each_possible_cpu(cpu) {
+ if (per_cpu(reserved_asids, cpu) == asid) {
+ hit = true;
+ per_cpu(reserved_asids, cpu) = newasid;
+ }
+ }
+
+ return hit;
}
static u64 new_context(struct mm_struct *mm, unsigned int cpu)
@@ -92,12 +107,14 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
u64 generation = atomic64_read(&asid_generation);
if (asid != 0) {
+ u64 newasid = generation | (asid & ~ASID_MASK);
+
/*
* If our current ASID was active during a rollover, we
* can continue to use it and this was just a false alarm.
*/
- if (is_reserved_asid(asid))
- return generation | (asid & ~ASID_MASK);
+ if (check_update_reserved_asid(asid, newasid))
+ return newasid;
/*
* We had a valid ASID in a previous life, so try to re-use
@@ -105,7 +122,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
*/
asid &= ~ASID_MASK;
if (!__test_and_set_bit(asid, asid_map))
- goto bump_gen;
+ return newasid;
}
/*
@@ -129,10 +146,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
set_asid:
__set_bit(asid, asid_map);
cur_idx = asid;
-
-bump_gen:
- asid |= generation;
- return asid;
+ return asid | generation;
}
void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 131a199114b4..7963aa4b5d28 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -18,6 +18,7 @@
*/
#include <linux/gfp.h>
+#include <linux/acpi.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/genalloc.h>
@@ -28,9 +29,6 @@
#include <asm/cacheflush.h>
-struct dma_map_ops *dma_ops;
-EXPORT_SYMBOL(dma_ops);
-
static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
bool coherent)
{
@@ -515,13 +513,7 @@ EXPORT_SYMBOL(dummy_dma_ops);
static int __init arm64_dma_init(void)
{
- int ret;
-
- dma_ops = &swiotlb_dma_ops;
-
- ret = atomic_pool_init();
-
- return ret;
+ return atomic_pool_init();
}
arch_initcall(arm64_dma_init);
@@ -552,10 +544,14 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
{
bool coherent = is_device_dma_coherent(dev);
int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent);
+ size_t iosize = size;
void *addr;
if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n"))
return NULL;
+
+ size = PAGE_ALIGN(size);
+
/*
* Some drivers rely on this, and we probably don't want the
* possibility of stale kernel data being read by devices anyway.
@@ -566,7 +562,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
struct page **pages;
pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
- pages = iommu_dma_alloc(dev, size, gfp, ioprot, handle,
+ pages = iommu_dma_alloc(dev, iosize, gfp, ioprot, handle,
flush_page);
if (!pages)
return NULL;
@@ -574,7 +570,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot,
__builtin_return_address(0));
if (!addr)
- iommu_dma_free(dev, pages, size, handle);
+ iommu_dma_free(dev, pages, iosize, handle);
} else {
struct page *page;
/*
@@ -591,7 +587,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
if (!addr)
return NULL;
- *handle = iommu_dma_map_page(dev, page, 0, size, ioprot);
+ *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot);
if (iommu_dma_mapping_error(dev, *handle)) {
if (coherent)
__free_pages(page, get_order(size));
@@ -606,6 +602,9 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t handle, struct dma_attrs *attrs)
{
+ size_t iosize = size;
+
+ size = PAGE_ALIGN(size);
/*
* @cpu_addr will be one of 3 things depending on how it was allocated:
* - A remapped array of pages from iommu_dma_alloc(), for all
@@ -617,17 +616,17 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
* Hence how dodgy the below logic looks...
*/
if (__in_atomic_pool(cpu_addr, size)) {
- iommu_dma_unmap_page(dev, handle, size, 0, NULL);
+ iommu_dma_unmap_page(dev, handle, iosize, 0, NULL);
__free_from_pool(cpu_addr, size);
} else if (is_vmalloc_addr(cpu_addr)){
struct vm_struct *area = find_vm_area(cpu_addr);
if (WARN_ON(!area || !area->pages))
return;
- iommu_dma_free(dev, area->pages, size, &handle);
+ iommu_dma_free(dev, area->pages, iosize, &handle);
dma_common_free_remap(cpu_addr, size, VM_USERMAP);
} else {
- iommu_dma_unmap_page(dev, handle, size, 0, NULL);
+ iommu_dma_unmap_page(dev, handle, iosize, 0, NULL);
__free_pages(virt_to_page(cpu_addr), get_order(size));
}
}
@@ -984,8 +983,8 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
struct iommu_ops *iommu, bool coherent)
{
- if (!acpi_disabled && !dev->archdata.dma_ops)
- dev->archdata.dma_ops = dma_ops;
+ if (!dev->archdata.dma_ops)
+ dev->archdata.dma_ops = &swiotlb_dma_ops;
dev->archdata.dma_coherent = coherent;
__iommu_setup_dma_ops(dev, dma_base, size, iommu);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 19211c4a8911..92ddac1e8ca2 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -393,16 +393,16 @@ static struct fault_info {
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" },
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" },
{ do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" },
- { do_bad, SIGBUS, 0, "reserved access flag fault" },
+ { do_bad, SIGBUS, 0, "unknown 8" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" },
- { do_bad, SIGBUS, 0, "reserved permission fault" },
+ { do_bad, SIGBUS, 0, "unknown 12" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" },
{ do_bad, SIGBUS, 0, "synchronous external abort" },
- { do_bad, SIGBUS, 0, "asynchronous external abort" },
+ { do_bad, SIGBUS, 0, "unknown 17" },
{ do_bad, SIGBUS, 0, "unknown 18" },
{ do_bad, SIGBUS, 0, "unknown 19" },
{ do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
@@ -410,16 +410,16 @@ static struct fault_info {
{ do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error" },
- { do_bad, SIGBUS, 0, "asynchronous parity error" },
+ { do_bad, SIGBUS, 0, "unknown 25" },
{ do_bad, SIGBUS, 0, "unknown 26" },
{ do_bad, SIGBUS, 0, "unknown 27" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" },
+ { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "unknown 32" },
{ do_bad, SIGBUS, BUS_ADRALN, "alignment fault" },
- { do_bad, SIGBUS, 0, "debug event" },
+ { do_bad, SIGBUS, 0, "unknown 34" },
{ do_bad, SIGBUS, 0, "unknown 35" },
{ do_bad, SIGBUS, 0, "unknown 36" },
{ do_bad, SIGBUS, 0, "unknown 37" },
@@ -433,21 +433,21 @@ static struct fault_info {
{ do_bad, SIGBUS, 0, "unknown 45" },
{ do_bad, SIGBUS, 0, "unknown 46" },
{ do_bad, SIGBUS, 0, "unknown 47" },
- { do_bad, SIGBUS, 0, "unknown 48" },
+ { do_bad, SIGBUS, 0, "TLB conflict abort" },
{ do_bad, SIGBUS, 0, "unknown 49" },
{ do_bad, SIGBUS, 0, "unknown 50" },
{ do_bad, SIGBUS, 0, "unknown 51" },
{ do_bad, SIGBUS, 0, "implementation fault (lockdown abort)" },
- { do_bad, SIGBUS, 0, "unknown 53" },
+ { do_bad, SIGBUS, 0, "implementation fault (unsupported exclusive)" },
{ do_bad, SIGBUS, 0, "unknown 54" },
{ do_bad, SIGBUS, 0, "unknown 55" },
{ do_bad, SIGBUS, 0, "unknown 56" },
{ do_bad, SIGBUS, 0, "unknown 57" },
- { do_bad, SIGBUS, 0, "implementation fault (coprocessor abort)" },
+ { do_bad, SIGBUS, 0, "unknown 58" },
{ do_bad, SIGBUS, 0, "unknown 59" },
{ do_bad, SIGBUS, 0, "unknown 60" },
- { do_bad, SIGBUS, 0, "unknown 61" },
- { do_bad, SIGBUS, 0, "unknown 62" },
+ { do_bad, SIGBUS, 0, "section domain fault" },
+ { do_bad, SIGBUS, 0, "page domain fault" },
{ do_bad, SIGBUS, 0, "unknown 63" },
};
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e3f563c81c48..873e363048c6 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -64,8 +64,12 @@ EXPORT_SYMBOL(phys_mem_access_prot);
static void __init *early_alloc(unsigned long sz)
{
- void *ptr = __va(memblock_alloc(sz, sz));
- BUG_ON(!ptr);
+ phys_addr_t phys;
+ void *ptr;
+
+ phys = memblock_alloc(sz, sz);
+ BUG_ON(!phys);
+ ptr = __va(phys);
memset(ptr, 0, sz);
return ptr;
}
@@ -81,55 +85,19 @@ static void split_pmd(pmd_t *pmd, pte_t *pte)
do {
/*
* Need to have the least restrictive permissions available
- * permissions will be fixed up later. Default the new page
- * range as contiguous ptes.
+ * permissions will be fixed up later
*/
- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC_CONT));
+ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
pfn++;
} while (pte++, i++, i < PTRS_PER_PTE);
}
-/*
- * Given a PTE with the CONT bit set, determine where the CONT range
- * starts, and clear the entire range of PTE CONT bits.
- */
-static void clear_cont_pte_range(pte_t *pte, unsigned long addr)
-{
- int i;
-
- pte -= CONT_RANGE_OFFSET(addr);
- for (i = 0; i < CONT_PTES; i++) {
- set_pte(pte, pte_mknoncont(*pte));
- pte++;
- }
- flush_tlb_all();
-}
-
-/*
- * Given a range of PTEs set the pfn and provided page protection flags
- */
-static void __populate_init_pte(pte_t *pte, unsigned long addr,
- unsigned long end, phys_addr_t phys,
- pgprot_t prot)
-{
- unsigned long pfn = __phys_to_pfn(phys);
-
- do {
- /* clear all the bits except the pfn, then apply the prot */
- set_pte(pte, pfn_pte(pfn, prot));
- pte++;
- pfn++;
- addr += PAGE_SIZE;
- } while (addr != end);
-}
-
static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
- unsigned long end, phys_addr_t phys,
+ unsigned long end, unsigned long pfn,
pgprot_t prot,
void *(*alloc)(unsigned long size))
{
pte_t *pte;
- unsigned long next;
if (pmd_none(*pmd) || pmd_sect(*pmd)) {
pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
@@ -142,27 +110,9 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
pte = pte_offset_kernel(pmd, addr);
do {
- next = min(end, (addr + CONT_SIZE) & CONT_MASK);
- if (((addr | next | phys) & ~CONT_MASK) == 0) {
- /* a block of CONT_PTES */
- __populate_init_pte(pte, addr, next, phys,
- __pgprot(pgprot_val(prot) | PTE_CONT));
- } else {
- /*
- * If the range being split is already inside of a
- * contiguous range but this PTE isn't going to be
- * contiguous, then we want to unmark the adjacent
- * ranges, then update the portion of the range we
- * are interrested in.
- */
- clear_cont_pte_range(pte, addr);
- __populate_init_pte(pte, addr, next, phys, prot);
- }
-
- pte += (next - addr) >> PAGE_SHIFT;
- phys += next - addr;
- addr = next;
- } while (addr != end);
+ set_pte(pte, pfn_pte(pfn, prot));
+ pfn++;
+ } while (pte++, addr += PAGE_SIZE, addr != end);
}
static void split_pud(pud_t *old_pud, pmd_t *pmd)
@@ -223,7 +173,8 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
}
}
} else {
- alloc_init_pte(pmd, addr, next, phys, prot, alloc);
+ alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
+ prot, alloc);
}
phys += next - addr;
} while (pmd++, addr = next, addr != end);
@@ -362,8 +313,8 @@ static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
* for now. This will get more fine grained later once all memory
* is mapped
*/
- unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
- unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
+ unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE);
+ unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE);
if (end < kernel_x_start) {
create_mapping(start, __phys_to_virt(start),
@@ -451,18 +402,18 @@ static void __init fixup_executable(void)
{
#ifdef CONFIG_DEBUG_RODATA
/* now that we are actually fully mapped, make the start/end more fine grained */
- if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) {
+ if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) {
unsigned long aligned_start = round_down(__pa(_stext),
- SECTION_SIZE);
+ SWAPPER_BLOCK_SIZE);
create_mapping(aligned_start, __phys_to_virt(aligned_start),
__pa(_stext) - aligned_start,
PAGE_KERNEL);
}
- if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) {
+ if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) {
unsigned long aligned_end = round_up(__pa(__init_end),
- SECTION_SIZE);
+ SWAPPER_BLOCK_SIZE);
create_mapping(__pa(__init_end), (unsigned long)__init_end,
aligned_end - __pa(__init_end),
PAGE_KERNEL);
@@ -475,7 +426,7 @@ void mark_rodata_ro(void)
{
create_mapping_late(__pa(_stext), (unsigned long)_stext,
(unsigned long)_etext - (unsigned long)_stext,
- PAGE_KERNEL_EXEC | PTE_RDONLY);
+ PAGE_KERNEL_ROX);
}
#endif
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index cf3c7d4a1b58..b162ad70effc 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -50,7 +50,7 @@ static const int bpf2a64[] = {
[BPF_REG_8] = A64_R(21),
[BPF_REG_9] = A64_R(22),
/* read-only frame pointer to access stack */
- [BPF_REG_FP] = A64_FP,
+ [BPF_REG_FP] = A64_R(25),
/* temporary register for internal BPF JIT */
[TMP_REG_1] = A64_R(23),
[TMP_REG_2] = A64_R(24),
@@ -139,6 +139,12 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
/* Stack must be multiples of 16B */
#define STACK_ALIGN(sz) (((sz) + 15) & ~15)
+#define _STACK_SIZE \
+ (MAX_BPF_STACK \
+ + 4 /* extra for skb_copy_bits buffer */)
+
+#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
+
static void build_prologue(struct jit_ctx *ctx)
{
const u8 r6 = bpf2a64[BPF_REG_6];
@@ -150,10 +156,35 @@ static void build_prologue(struct jit_ctx *ctx)
const u8 rx = bpf2a64[BPF_REG_X];
const u8 tmp1 = bpf2a64[TMP_REG_1];
const u8 tmp2 = bpf2a64[TMP_REG_2];
- int stack_size = MAX_BPF_STACK;
- stack_size += 4; /* extra for skb_copy_bits buffer */
- stack_size = STACK_ALIGN(stack_size);
+ /*
+ * BPF prog stack layout
+ *
+ * high
+ * original A64_SP => 0:+-----+ BPF prologue
+ * |FP/LR|
+ * current A64_FP => -16:+-----+
+ * | ... | callee saved registers
+ * +-----+
+ * | | x25/x26
+ * BPF fp register => -80:+-----+ <= (BPF_FP)
+ * | |
+ * | ... | BPF prog stack
+ * | |
+ * +-----+ <= (BPF_FP - MAX_BPF_STACK)
+ * |RSVD | JIT scratchpad
+ * current A64_SP => +-----+ <= (BPF_FP - STACK_SIZE)
+ * | |
+ * | ... | Function call stack
+ * | |
+ * +-----+
+ * low
+ *
+ */
+
+ /* Save FP and LR registers to stay align with ARM64 AAPCS */
+ emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
+ emit(A64_MOV(1, A64_FP, A64_SP), ctx);
/* Save callee-saved register */
emit(A64_PUSH(r6, r7, A64_SP), ctx);
@@ -161,12 +192,15 @@ static void build_prologue(struct jit_ctx *ctx)
if (ctx->tmp_used)
emit(A64_PUSH(tmp1, tmp2, A64_SP), ctx);
- /* Set up BPF stack */
- emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
+ /* Save fp (x25) and x26. SP requires 16 bytes alignment */
+ emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx);
- /* Set up frame pointer */
+ /* Set up BPF prog stack base register (x25) */
emit(A64_MOV(1, fp, A64_SP), ctx);
+ /* Set up function call stack */
+ emit(A64_SUB_I(1, A64_SP, A64_SP, STACK_SIZE), ctx);
+
/* Clear registers A and X */
emit_a64_mov_i64(ra, 0, ctx);
emit_a64_mov_i64(rx, 0, ctx);
@@ -182,13 +216,12 @@ static void build_epilogue(struct jit_ctx *ctx)
const u8 fp = bpf2a64[BPF_REG_FP];
const u8 tmp1 = bpf2a64[TMP_REG_1];
const u8 tmp2 = bpf2a64[TMP_REG_2];
- int stack_size = MAX_BPF_STACK;
-
- stack_size += 4; /* extra for skb_copy_bits buffer */
- stack_size = STACK_ALIGN(stack_size);
/* We're done with BPF stack */
- emit(A64_ADD_I(1, A64_SP, A64_SP, stack_size), ctx);
+ emit(A64_ADD_I(1, A64_SP, A64_SP, STACK_SIZE), ctx);
+
+ /* Restore fs (x25) and x26 */
+ emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
/* Restore callee-saved register */
if (ctx->tmp_used)
@@ -196,8 +229,8 @@ static void build_epilogue(struct jit_ctx *ctx)
emit(A64_POP(r8, r9, A64_SP), ctx);
emit(A64_POP(r6, r7, A64_SP), ctx);
- /* Restore frame pointer */
- emit(A64_MOV(1, fp, A64_SP), ctx);
+ /* Restore FP/LR registers */
+ emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
/* Set return value */
emit(A64_MOV(1, A64_R(0), r0), ctx);
@@ -557,7 +590,25 @@ emit_cond_jmp:
case BPF_ST | BPF_MEM | BPF_H:
case BPF_ST | BPF_MEM | BPF_B:
case BPF_ST | BPF_MEM | BPF_DW:
- goto notyet;
+ /* Load imm to a register then store it */
+ ctx->tmp_used = 1;
+ emit_a64_mov_i(1, tmp2, off, ctx);
+ emit_a64_mov_i(1, tmp, imm, ctx);
+ switch (BPF_SIZE(code)) {
+ case BPF_W:
+ emit(A64_STR32(tmp, dst, tmp2), ctx);
+ break;
+ case BPF_H:
+ emit(A64_STRH(tmp, dst, tmp2), ctx);
+ break;
+ case BPF_B:
+ emit(A64_STRB(tmp, dst, tmp2), ctx);
+ break;
+ case BPF_DW:
+ emit(A64_STR64(tmp, dst, tmp2), ctx);
+ break;
+ }
+ break;
/* STX: *(size *)(dst + off) = src */
case BPF_STX | BPF_MEM | BPF_W:
@@ -624,7 +675,7 @@ emit_cond_jmp:
return -EINVAL;
}
emit_a64_mov_i64(r3, size, ctx);
- emit(A64_ADD_I(1, r4, fp, MAX_BPF_STACK), ctx);
+ emit(A64_SUB_I(1, r4, fp, STACK_SIZE), ctx);
emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx);
emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
emit(A64_MOV(1, A64_FP, A64_SP), ctx);
@@ -758,7 +809,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, image_size, 2, ctx.image);
- bpf_flush_icache(ctx.image, ctx.image + ctx.idx);
+ bpf_flush_icache(header, ctx.image + ctx.idx);
set_memory_ro((unsigned long)header, header->pages);
prog->bpf_func = (void *)ctx.image;
diff --git a/arch/blackfin/include/asm/cmpxchg.h b/arch/blackfin/include/asm/cmpxchg.h
index c05868cc61c1..253928854299 100644
--- a/arch/blackfin/include/asm/cmpxchg.h
+++ b/arch/blackfin/include/asm/cmpxchg.h
@@ -128,6 +128,5 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
#endif /* !CONFIG_SMP */
#define xchg(ptr, x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
-#define tas(ptr) ((void)xchg((ptr), 1))
#endif /* __ARCH_BLACKFIN_CMPXCHG__ */
diff --git a/arch/blackfin/kernel/perf_event.c b/arch/blackfin/kernel/perf_event.c
index 1e9c8b0bf486..170d786807c4 100644
--- a/arch/blackfin/kernel/perf_event.c
+++ b/arch/blackfin/kernel/perf_event.c
@@ -14,7 +14,7 @@
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2009 Jaswinder Singh Rajput
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
* Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
*
* ppc:
diff --git a/arch/c6x/include/asm/cmpxchg.h b/arch/c6x/include/asm/cmpxchg.h
index b27c8cefb8c3..93d0a5a047a2 100644
--- a/arch/c6x/include/asm/cmpxchg.h
+++ b/arch/c6x/include/asm/cmpxchg.h
@@ -47,8 +47,6 @@ static inline unsigned int __xchg(unsigned int x, volatile void *ptr, int size)
#define xchg(ptr, x) \
((__typeof__(*(ptr)))__xchg((unsigned int)(x), (void *) (ptr), \
sizeof(*(ptr))))
-#define tas(ptr) xchg((ptr), 1)
-
#include <asm-generic/cmpxchg-local.h>
diff --git a/arch/frv/include/asm/cmpxchg.h b/arch/frv/include/asm/cmpxchg.h
index 5b04dd0aecab..a899765102ea 100644
--- a/arch/frv/include/asm/cmpxchg.h
+++ b/arch/frv/include/asm/cmpxchg.h
@@ -69,8 +69,6 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v);
#endif
-#define tas(ptr) (xchg((ptr), 1))
-
/*****************************************************************************/
/*
* compare and conditionally exchange value with memory
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index dd3ac75776ad..2e20333cbce9 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -17,6 +17,7 @@ config H8300
select HAVE_MEMBLOCK
select HAVE_DMA_ATTRS
select CLKSRC_OF
+ select H8300_TMR8
config RWSEM_GENERIC_SPINLOCK
def_bool y
diff --git a/arch/h8300/include/asm/io.h b/arch/h8300/include/asm/io.h
index bb837cded268..f0e14f3a800d 100644
--- a/arch/h8300/include/asm/io.h
+++ b/arch/h8300/include/asm/io.h
@@ -3,40 +3,45 @@
#ifdef __KERNEL__
-#include <asm-generic/io.h>
-
/* H8/300 internal I/O functions */
-static inline unsigned char ctrl_inb(unsigned long addr)
+
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
{
- return *(volatile unsigned char *)addr;
+ return *(volatile u8 *)addr;
}
-static inline unsigned short ctrl_inw(unsigned long addr)
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
{
- return *(volatile unsigned short *)addr;
+ return *(volatile u16 *)addr;
}
-static inline unsigned long ctrl_inl(unsigned long addr)
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
{
- return *(volatile unsigned long *)addr;
+ return *(volatile u32 *)addr;
}
-static inline void ctrl_outb(unsigned char b, unsigned long addr)
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 b, const volatile void __iomem *addr)
{
- *(volatile unsigned char *)addr = b;
+ *(volatile u8 *)addr = b;
}
-static inline void ctrl_outw(unsigned short b, unsigned long addr)
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 b, const volatile void __iomem *addr)
{
- *(volatile unsigned short *)addr = b;
+ *(volatile u16 *)addr = b;
}
-static inline void ctrl_outl(unsigned long b, unsigned long addr)
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 b, const volatile void __iomem *addr)
{
- *(volatile unsigned long *)addr = b;
+ *(volatile u32 *)addr = b;
}
-static inline void ctrl_bclr(int b, unsigned char *addr)
+static inline void ctrl_bclr(int b, void __iomem *addr)
{
if (__builtin_constant_p(b))
__asm__("bclr %1,%0" : "+WU"(*addr): "i"(b));
@@ -44,7 +49,7 @@ static inline void ctrl_bclr(int b, unsigned char *addr)
__asm__("bclr %w1,%0" : "+WU"(*addr): "r"(b));
}
-static inline void ctrl_bset(int b, unsigned char *addr)
+static inline void ctrl_bset(int b, void __iomem *addr)
{
if (__builtin_constant_p(b))
__asm__("bset %1,%0" : "+WU"(*addr): "i"(b));
@@ -52,6 +57,8 @@ static inline void ctrl_bset(int b, unsigned char *addr)
__asm__("bset %w1,%0" : "+WU"(*addr): "r"(b));
}
+#include <asm-generic/io.h>
+
#endif /* __KERNEL__ */
#endif /* _H8300_IO_H */
diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index c772abe6d19c..e4985dfa91dc 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -207,14 +207,14 @@ device_initcall(device_probe);
#define get_wait(base, addr) ({ \
int baddr; \
baddr = ((addr) / 0x200000 * 2); \
- w *= (ctrl_inw((unsigned long)(base) + 2) & (3 << baddr)) + 1; \
+ w *= (readw((base) + 2) & (3 << baddr)) + 1; \
})
#endif
#if defined(CONFIG_CPU_H8S)
#define get_wait(base, addr) ({ \
int baddr; \
baddr = ((addr) / 0x200000 * 16); \
- w *= (ctrl_inl((unsigned long)(base) + 2) & (7 << baddr)) + 1; \
+ w *= (readl((base) + 2) & (7 << baddr)) + 1; \
})
#endif
@@ -228,8 +228,8 @@ static __init int access_timing(void)
bsc = of_find_compatible_node(NULL, NULL, "renesas,h8300-bsc");
base = of_iomap(bsc, 0);
- w = (ctrl_inb((unsigned long)base + 0) & bit)?2:1;
- if (ctrl_inb((unsigned long)base + 1) & bit)
+ w = (readb(base + 0) & bit)?2:1;
+ if (readb(base + 1) & bit)
w *= get_wait(base, addr);
else
w *= 2;
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index df896a1c41d3..209c4b817c95 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -77,7 +77,7 @@ do { \
___p1; \
})
-#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
/*
* The group barrier in front of the rsm & ssm are necessary to ensure
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index db73390568c8..74c132d901bd 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -11,7 +11,7 @@
-#define NR_syscalls 322 /* length of syscall table */
+#define NR_syscalls 323 /* length of syscall table */
/*
* The following defines stop scripts/checksyscalls.sh from complaining about
diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h
index 9038726e7d26..762edce7572e 100644
--- a/arch/ia64/include/uapi/asm/unistd.h
+++ b/arch/ia64/include/uapi/asm/unistd.h
@@ -335,5 +335,6 @@
#define __NR_userfaultfd 1343
#define __NR_membarrier 1344
#define __NR_kcmp 1345
+#define __NR_mlock2 1346
#endif /* _UAPI_ASM_IA64_UNISTD_H */
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index dcd97f84d065..534a74acb849 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1771,5 +1771,6 @@ sys_call_table:
data8 sys_userfaultfd
data8 sys_membarrier
data8 sys_kcmp // 1345
+ data8 sys_mlock2
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index fd104bd221ce..860e440611c9 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -3,6 +3,7 @@ generic-y += clkdev.h
generic-y += cputime.h
generic-y += exec.h
generic-y += irq_work.h
+generic-y += kvm_para.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += module.h
diff --git a/arch/m32r/include/asm/io.h b/arch/m32r/include/asm/io.h
index 61b8931bc192..4b0f5e001d4d 100644
--- a/arch/m32r/include/asm/io.h
+++ b/arch/m32r/include/asm/io.h
@@ -168,13 +168,21 @@ static inline void _writel(unsigned long l, unsigned long addr)
#define writew_relaxed writew
#define writel_relaxed writel
-#define ioread8 read
+#define ioread8 readb
#define ioread16 readw
#define ioread32 readl
#define iowrite8 writeb
#define iowrite16 writew
#define iowrite32 writel
+#define ioread8_rep(p, dst, count) insb((unsigned long)(p), (dst), (count))
+#define ioread16_rep(p, dst, count) insw((unsigned long)(p), (dst), (count))
+#define ioread32_rep(p, dst, count) insl((unsigned long)(p), (dst), (count))
+
+#define iowrite8_rep(p, src, count) outsb((unsigned long)(p), (src), (count))
+#define iowrite16_rep(p, src, count) outsw((unsigned long)(p), (src), (count))
+#define iowrite32_rep(p, src, count) outsl((unsigned long)(p), (src), (count))
+
#define ioread16be(addr) be16_to_cpu(readw(addr))
#define ioread32be(addr) be32_to_cpu(readl(addr))
#define iowrite16be(v, addr) writew(cpu_to_be16(v), (addr))
diff --git a/arch/m68k/coldfire/m54xx.c b/arch/m68k/coldfire/m54xx.c
index f7836c6a6b60..c32f76791f48 100644
--- a/arch/m68k/coldfire/m54xx.c
+++ b/arch/m68k/coldfire/m54xx.c
@@ -98,7 +98,7 @@ static void __init mcf54xx_bootmem_alloc(void)
memstart = PAGE_ALIGN(_ramstart);
min_low_pfn = PFN_DOWN(_rambase);
start_pfn = PFN_DOWN(memstart);
- max_low_pfn = PFN_DOWN(_ramend);
+ max_pfn = max_low_pfn = PFN_DOWN(_ramend);
high_memory = (void *)_ramend;
m68k_virt_to_node_shift = fls(_ramend - _rambase - 1) - 6;
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 0793a7f17417..f9d96bf86910 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -4,7 +4,7 @@
#include <uapi/asm/unistd.h>
-#define NR_syscalls 375
+#define NR_syscalls 376
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_OLD_STAT
diff --git a/arch/m68k/include/uapi/asm/unistd.h b/arch/m68k/include/uapi/asm/unistd.h
index 5e6fae6c275f..36cf129de663 100644
--- a/arch/m68k/include/uapi/asm/unistd.h
+++ b/arch/m68k/include/uapi/asm/unistd.h
@@ -380,5 +380,6 @@
#define __NR_sendmmsg 372
#define __NR_userfaultfd 373
#define __NR_membarrier 374
+#define __NR_mlock2 375
#endif /* _UAPI_ASM_M68K_UNISTD_H_ */
diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c
index 88c27d94a721..76b9113f3092 100644
--- a/arch/m68k/kernel/setup_no.c
+++ b/arch/m68k/kernel/setup_no.c
@@ -238,11 +238,14 @@ void __init setup_arch(char **cmdline_p)
* Give all the memory to the bootmap allocator, tell it to put the
* boot mem_map at the start of memory.
*/
+ min_low_pfn = PFN_DOWN(memory_start);
+ max_pfn = max_low_pfn = PFN_DOWN(memory_end);
+
bootmap_size = init_bootmem_node(
NODE_DATA(0),
- memory_start >> PAGE_SHIFT, /* map goes here */
- PAGE_OFFSET >> PAGE_SHIFT, /* 0 on coldfire */
- memory_end >> PAGE_SHIFT);
+ min_low_pfn, /* map goes here */
+ PFN_DOWN(PAGE_OFFSET),
+ max_pfn);
/*
* Free the usable memory, we have to make sure we do not free
* the bootmem bitmap so we then reserve it after freeing it :-)
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index 5dd0e80042f5..282cd903f4c4 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -395,3 +395,4 @@ ENTRY(sys_call_table)
.long sys_sendmmsg
.long sys_userfaultfd
.long sys_membarrier
+ .long sys_mlock2 /* 375 */
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index b958916e5eac..8f37fdd80be9 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -250,7 +250,7 @@ void __init paging_init(void)
high_memory = phys_to_virt(max_addr);
min_low_pfn = availmem >> PAGE_SHIFT;
- max_low_pfn = max_addr >> PAGE_SHIFT;
+ max_pfn = max_low_pfn = max_addr >> PAGE_SHIFT;
for (i = 0; i < m68k_num_memory; i++) {
addr = m68k_memory[i].addr;
diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c
index a8b942bf7163..2a5f43a68ae3 100644
--- a/arch/m68k/sun3/config.c
+++ b/arch/m68k/sun3/config.c
@@ -118,13 +118,13 @@ static void __init sun3_bootmem_alloc(unsigned long memory_start,
memory_end = memory_end & PAGE_MASK;
start_page = __pa(memory_start) >> PAGE_SHIFT;
- num_pages = __pa(memory_end) >> PAGE_SHIFT;
+ max_pfn = num_pages = __pa(memory_end) >> PAGE_SHIFT;
high_memory = (void *)memory_end;
availmem = memory_start;
m68k_setup_node(0);
- availmem += init_bootmem_node(NODE_DATA(0), start_page, 0, num_pages);
+ availmem += init_bootmem(start_page, num_pages);
availmem = (availmem + (PAGE_SIZE-1)) & PAGE_MASK;
free_bootmem(__pa(availmem), memory_end - (availmem));
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index c89da6312954..bf4dec229437 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -61,7 +61,8 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
/* FIXME this part of code is untested */
for_each_sg(sgl, sg, nents, i) {
sg->dma_address = sg_phys(sg);
- __dma_sync(sg_phys(sg), sg->length, direction);
+ __dma_sync(page_to_phys(sg_page(sg)) + sg->offset,
+ sg->length, direction);
}
return nents;
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index 1ba21204ebe0..8755d618e116 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -216,9 +216,9 @@ void __init plat_mem_setup(void)
AR71XX_RESET_SIZE);
ath79_pll_base = ioremap_nocache(AR71XX_PLL_BASE,
AR71XX_PLL_SIZE);
+ ath79_detect_sys_type();
ath79_ddr_ctrl_init();
- ath79_detect_sys_type();
if (mips_machtype != ATH79_MACH_GENERIC_OF)
detect_memory_region(0, ATH79_MEM_SIZE_MIN, ATH79_MEM_SIZE_MAX);
@@ -281,3 +281,8 @@ MIPS_MACHINE(ATH79_MACH_GENERIC,
"Generic",
"Generic AR71XX/AR724X/AR913X based board",
ath79_generic_init);
+
+MIPS_MACHINE(ATH79_MACH_GENERIC_OF,
+ "DTB",
+ "Generic AR71XX/AR724X/AR913X based board (DT)",
+ NULL);
diff --git a/arch/mips/boot/dts/qca/ar9132.dtsi b/arch/mips/boot/dts/qca/ar9132.dtsi
index fb7734eadbf0..13d0439496a9 100644
--- a/arch/mips/boot/dts/qca/ar9132.dtsi
+++ b/arch/mips/boot/dts/qca/ar9132.dtsi
@@ -107,7 +107,7 @@
miscintc: interrupt-controller@18060010 {
compatible = "qca,ar9132-misc-intc",
"qca,ar7100-misc-intc";
- reg = <0x18060010 0x4>;
+ reg = <0x18060010 0x8>;
interrupt-parent = <&cpuintc>;
interrupts = <6>;
diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index ad1fccdb8d13..2046c0230224 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -200,8 +200,9 @@ static inline int pfn_valid(unsigned long pfn)
{
/* avoid <linux/mm.h> include hell */
extern unsigned long max_mapnr;
+ unsigned long pfn_offset = ARCH_PFN_OFFSET;
- return pfn >= ARCH_PFN_OFFSET && pfn < max_mapnr;
+ return pfn >= pfn_offset && pfn < max_mapnr;
}
#elif defined(CONFIG_SPARSEMEM)
diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
index 5305d694ffe5..095ecafe6bd3 100644
--- a/arch/mips/include/asm/uaccess.h
+++ b/arch/mips/include/asm/uaccess.h
@@ -599,7 +599,7 @@ extern void __put_user_unknown(void);
* On error, the variable @x is set to zero.
*/
#define __get_user_unaligned(x,ptr) \
- __get_user__unalignednocheck((x),(ptr),sizeof(*(ptr)))
+ __get_user_unaligned_nocheck((x),(ptr),sizeof(*(ptr)))
/*
* Yuck. We need two variants, one for 64bit operation and one
@@ -620,8 +620,8 @@ extern void __get_user_unaligned_unknown(void);
do { \
switch (size) { \
case 1: __get_data_asm(val, "lb", ptr); break; \
- case 2: __get_user_unaligned_asm(val, "ulh", ptr); break; \
- case 4: __get_user_unaligned_asm(val, "ulw", ptr); break; \
+ case 2: __get_data_unaligned_asm(val, "ulh", ptr); break; \
+ case 4: __get_data_unaligned_asm(val, "ulw", ptr); break; \
case 8: __GET_USER_UNALIGNED_DW(val, ptr); break; \
default: __get_user_unaligned_unknown(); break; \
} \
@@ -1122,9 +1122,15 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n);
__cu_to = (to); \
__cu_from = (from); \
__cu_len = (n); \
- might_fault(); \
- __cu_len = __invoke_copy_from_user(__cu_to, __cu_from, \
- __cu_len); \
+ if (eva_kernel_access()) { \
+ __cu_len = __invoke_copy_from_kernel(__cu_to, \
+ __cu_from, \
+ __cu_len); \
+ } else { \
+ might_fault(); \
+ __cu_len = __invoke_copy_from_user(__cu_to, __cu_from, \
+ __cu_len); \
+ } \
__cu_len; \
})
@@ -1229,16 +1235,28 @@ __clear_user(void __user *addr, __kernel_size_t size)
{
__kernel_size_t res;
- might_fault();
- __asm__ __volatile__(
- "move\t$4, %1\n\t"
- "move\t$5, $0\n\t"
- "move\t$6, %2\n\t"
- __MODULE_JAL(__bzero)
- "move\t%0, $6"
- : "=r" (res)
- : "r" (addr), "r" (size)
- : "$4", "$5", "$6", __UA_t0, __UA_t1, "$31");
+ if (eva_kernel_access()) {
+ __asm__ __volatile__(
+ "move\t$4, %1\n\t"
+ "move\t$5, $0\n\t"
+ "move\t$6, %2\n\t"
+ __MODULE_JAL(__bzero_kernel)
+ "move\t%0, $6"
+ : "=r" (res)
+ : "r" (addr), "r" (size)
+ : "$4", "$5", "$6", __UA_t0, __UA_t1, "$31");
+ } else {
+ might_fault();
+ __asm__ __volatile__(
+ "move\t$4, %1\n\t"
+ "move\t$5, $0\n\t"
+ "move\t$6, %2\n\t"
+ __MODULE_JAL(__bzero)
+ "move\t%0, $6"
+ : "=r" (res)
+ : "r" (addr), "r" (size)
+ : "$4", "$5", "$6", __UA_t0, __UA_t1, "$31");
+ }
return res;
}
@@ -1384,7 +1402,7 @@ static inline long strlen_user(const char __user *s)
might_fault();
__asm__ __volatile__(
"move\t$4, %1\n\t"
- __MODULE_JAL(__strlen_kernel_asm)
+ __MODULE_JAL(__strlen_user_asm)
"move\t%0, $2"
: "=r" (res)
: "r" (s)
diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
index 8fd5a276cad2..ac81edd44563 100644
--- a/arch/mips/kernel/cps-vec.S
+++ b/arch/mips/kernel/cps-vec.S
@@ -257,7 +257,6 @@ LEAF(mips_cps_core_init)
has_mt t0, 3f
.set push
- .set mips64r2
.set mt
/* Only allow 1 TC per VPE to execute... */
@@ -376,7 +375,6 @@ LEAF(mips_cps_boot_vpes)
nop
.set push
- .set mips64r2
.set mt
1: /* Enter VPE configuration state */
diff --git a/arch/mips/kernel/mips_ksyms.c b/arch/mips/kernel/mips_ksyms.c
index 291af0b5c482..e2b6ab74643d 100644
--- a/arch/mips/kernel/mips_ksyms.c
+++ b/arch/mips/kernel/mips_ksyms.c
@@ -17,6 +17,7 @@
#include <asm/fpu.h>
#include <asm/msa.h>
+extern void *__bzero_kernel(void *__s, size_t __count);
extern void *__bzero(void *__s, size_t __count);
extern long __strncpy_from_kernel_nocheck_asm(char *__to,
const char *__from, long __len);
@@ -64,6 +65,7 @@ EXPORT_SYMBOL(__copy_from_user_eva);
EXPORT_SYMBOL(__copy_in_user_eva);
EXPORT_SYMBOL(__copy_to_user_eva);
EXPORT_SYMBOL(__copy_user_inatomic_eva);
+EXPORT_SYMBOL(__bzero_kernel);
#endif
EXPORT_SYMBOL(__bzero);
EXPORT_SYMBOL(__strncpy_from_kernel_nocheck_asm);
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index d5fa3eaf39a1..41b1b090f56f 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -1581,7 +1581,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
base = (inst >> 21) & 0x1f;
op_inst = (inst >> 16) & 0x1f;
- offset = inst & 0xffff;
+ offset = (int16_t)inst;
cache = (inst >> 16) & 0x3;
op = (inst >> 18) & 0x7;
diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S
index 7bab3a4e8f7d..7e2210846b8b 100644
--- a/arch/mips/kvm/locore.S
+++ b/arch/mips/kvm/locore.S
@@ -157,9 +157,11 @@ FEXPORT(__kvm_mips_vcpu_run)
FEXPORT(__kvm_mips_load_asid)
/* Set the ASID for the Guest Kernel */
- INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */
- /* addresses shift to 0x80000000 */
- bltz t0, 1f /* If kernel */
+ PTR_L t0, VCPU_COP0(k1)
+ LONG_L t0, COP0_STATUS(t0)
+ andi t0, KSU_USER | ST0_ERL | ST0_EXL
+ xori t0, KSU_USER
+ bnez t0, 1f /* If kernel */
INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */
INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */
1:
@@ -474,9 +476,11 @@ __kvm_mips_return_to_guest:
mtc0 t0, CP0_EPC
/* Set the ASID for the Guest Kernel */
- INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */
- /* addresses shift to 0x80000000 */
- bltz t0, 1f /* If kernel */
+ PTR_L t0, VCPU_COP0(k1)
+ LONG_L t0, COP0_STATUS(t0)
+ andi t0, KSU_USER | ST0_ERL | ST0_EXL
+ xori t0, KSU_USER
+ bnez t0, 1f /* If kernel */
INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */
INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */
1:
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 49ff3bfc007e..b9b803facdbf 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -279,7 +279,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
if (!gebase) {
err = -ENOMEM;
- goto out_free_cpu;
+ goto out_uninit_cpu;
}
kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n",
ALIGN(size, PAGE_SIZE), gebase);
@@ -343,6 +343,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
out_free_gebase:
kfree(gebase);
+out_uninit_cpu:
+ kvm_vcpu_uninit(vcpu);
+
out_free_cpu:
kfree(vcpu);
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index b8e63fd00375..8f0019a2e5c8 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -283,6 +283,8 @@ LEAF(memset)
1:
#ifndef CONFIG_EVA
FEXPORT(__bzero)
+#else
+FEXPORT(__bzero_kernel)
#endif
__BUILD_BZERO LEGACY_MODE
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index d8117be729a2..730d394ce5f0 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -145,7 +145,7 @@ static void *mips_dma_alloc_coherent(struct device *dev, size_t size,
gfp = massage_gfp_flags(dev, gfp);
- if (IS_ENABLED(CONFIG_DMA_CMA) && !(gfp & GFP_ATOMIC))
+ if (IS_ENABLED(CONFIG_DMA_CMA) && gfpflags_allow_blocking(gfp))
page = dma_alloc_from_contiguous(dev,
count, get_order(size));
if (!page)
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 77cb27309db2..1a8c96035716 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -521,19 +521,6 @@ static inline u16 align_sp(unsigned int num)
return num;
}
-static bool is_load_to_a(u16 inst)
-{
- switch (inst) {
- case BPF_LD | BPF_W | BPF_LEN:
- case BPF_LD | BPF_W | BPF_ABS:
- case BPF_LD | BPF_H | BPF_ABS:
- case BPF_LD | BPF_B | BPF_ABS:
- return true;
- default:
- return false;
- }
-}
-
static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
{
int i = 0, real_off = 0;
@@ -614,7 +601,6 @@ static unsigned int get_stack_depth(struct jit_ctx *ctx)
static void build_prologue(struct jit_ctx *ctx)
{
- u16 first_inst = ctx->skf->insns[0].code;
int sp_off;
/* Calculate the total offset for the stack pointer */
@@ -641,7 +627,7 @@ static void build_prologue(struct jit_ctx *ctx)
emit_jit_reg_move(r_X, r_zero, ctx);
/* Do not leak kernel data to userspace */
- if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst)))
+ if (bpf_needs_clear_a(&ctx->skf->insns[0]))
emit_jit_reg_move(r_A, r_zero, ctx);
}
diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c
index 8a978022630b..a245cad4372a 100644
--- a/arch/mips/pci/pci-rt2880.c
+++ b/arch/mips/pci/pci-rt2880.c
@@ -11,6 +11,7 @@
* by the Free Software Foundation.
*/
+#include <linux/delay.h>
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/io.h>
@@ -220,7 +221,6 @@ int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
static int rt288x_pci_probe(struct platform_device *pdev)
{
void __iomem *io_map_base;
- int i;
rt2880_pci_base = ioremap_nocache(RT2880_PCI_BASE, PAGE_SIZE);
@@ -232,8 +232,7 @@ static int rt288x_pci_probe(struct platform_device *pdev)
ioport_resource.end = RT2880_PCI_IO_BASE + RT2880_PCI_IO_SIZE - 1;
rt2880_pci_reg_write(0, RT2880_PCI_REG_PCICFG_ADDR);
- for (i = 0; i < 0xfffff; i++)
- ;
+ udelay(1);
rt2880_pci_reg_write(0x79, RT2880_PCI_REG_ARBCTL);
rt2880_pci_reg_write(0x07FF0001, RT2880_PCI_REG_BAR0SETUP_ADDR);
diff --git a/arch/mips/pmcs-msp71xx/msp_setup.c b/arch/mips/pmcs-msp71xx/msp_setup.c
index 4f925e06c414..9d293b3e9130 100644
--- a/arch/mips/pmcs-msp71xx/msp_setup.c
+++ b/arch/mips/pmcs-msp71xx/msp_setup.c
@@ -10,6 +10,8 @@
* option) any later version.
*/
+#include <linux/delay.h>
+
#include <asm/bootinfo.h>
#include <asm/cacheflush.h>
#include <asm/idle.h>
@@ -37,7 +39,6 @@ extern void msp_serial_setup(void);
void msp7120_reset(void)
{
void *start, *end, *iptr;
- register int i;
/* Diasble all interrupts */
local_irq_disable();
@@ -77,7 +78,7 @@ void msp7120_reset(void)
*/
/* Wait a bit for the DDRC to settle */
- for (i = 0; i < 100000000; i++);
+ mdelay(125);
#if defined(CONFIG_PMC_MSP7120_GW)
/*
diff --git a/arch/mips/sni/reset.c b/arch/mips/sni/reset.c
index 244f9427625b..6afa34346b81 100644
--- a/arch/mips/sni/reset.c
+++ b/arch/mips/sni/reset.c
@@ -3,6 +3,8 @@
*
* Reset a SNI machine.
*/
+#include <linux/delay.h>
+
#include <asm/io.h>
#include <asm/reboot.h>
#include <asm/sni.h>
@@ -24,7 +26,7 @@ static inline void kb_wait(void)
/* XXX This ends up at the ARC firmware prompt ... */
void sni_machine_restart(char *command)
{
- int i, j;
+ int i;
/* This does a normal via the keyboard controller like a PC.
We can do that easier ... */
@@ -32,9 +34,9 @@ void sni_machine_restart(char *command)
for (;;) {
for (i = 0; i < 100; i++) {
kb_wait();
- for (j = 0; j < 100000 ; j++)
- /* nothing */;
+ udelay(50);
outb_p(0xfe, 0x64); /* pulse reset low */
+ udelay(50);
}
}
}
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index ef5f348f386a..14568900fc1d 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -26,8 +26,8 @@ aflags-vdso := $(ccflags-vdso) \
# the comments on that file.
#
ifndef CONFIG_CPU_MIPSR6
- ifeq ($(call ld-ifversion, -gt, 22400000, y),)
- $(warning MIPS VDSO requires binutils > 2.24)
+ ifeq ($(call ld-ifversion, -lt, 22500000, y),y)
+ $(warning MIPS VDSO requires binutils >= 2.25)
obj-vdso-y := $(filter-out gettimeofday.o, $(obj-vdso-y))
ccflags-vdso += -DDISABLE_MIPS_VDSO
endif
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 4434b54e1d87..78ae5552fdb8 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -1,6 +1,7 @@
config MN10300
def_bool y
select HAVE_OPROFILE
+ select HAVE_UID16
select GENERIC_IRQ_SHOW
select ARCH_WANT_IPC_PARSE_VERSION
select HAVE_ARCH_TRACEHOOK
@@ -37,9 +38,6 @@ config HIGHMEM
config NUMA
def_bool n
-config UID16
- def_bool y
-
config RWSEM_GENERIC_SPINLOCK
def_bool y
diff --git a/arch/nios2/mm/cacheflush.c b/arch/nios2/mm/cacheflush.c
index 223cdcc8203f..87bf88ed04c6 100644
--- a/arch/nios2/mm/cacheflush.c
+++ b/arch/nios2/mm/cacheflush.c
@@ -23,22 +23,6 @@ static void __flush_dcache(unsigned long start, unsigned long end)
end += (cpuinfo.dcache_line_size - 1);
end &= ~(cpuinfo.dcache_line_size - 1);
- for (addr = start; addr < end; addr += cpuinfo.dcache_line_size) {
- __asm__ __volatile__ (" flushda 0(%0)\n"
- : /* Outputs */
- : /* Inputs */ "r"(addr)
- /* : No clobber */);
- }
-}
-
-static void __flush_dcache_all(unsigned long start, unsigned long end)
-{
- unsigned long addr;
-
- start &= ~(cpuinfo.dcache_line_size - 1);
- end += (cpuinfo.dcache_line_size - 1);
- end &= ~(cpuinfo.dcache_line_size - 1);
-
if (end > start + cpuinfo.dcache_size)
end = start + cpuinfo.dcache_size;
@@ -112,7 +96,7 @@ static void flush_aliases(struct address_space *mapping, struct page *page)
void flush_cache_all(void)
{
- __flush_dcache_all(0, cpuinfo.dcache_size);
+ __flush_dcache(0, cpuinfo.dcache_size);
__flush_icache(0, cpuinfo.icache_size);
}
@@ -182,7 +166,7 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
*/
unsigned long start = (unsigned long)page_address(page);
- __flush_dcache_all(start, start + PAGE_SIZE);
+ __flush_dcache(start, start + PAGE_SIZE);
}
void flush_dcache_page(struct page *page)
@@ -268,7 +252,7 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
{
flush_cache_page(vma, user_vaddr, page_to_pfn(page));
memcpy(dst, src, len);
- __flush_dcache_all((unsigned long)src, (unsigned long)src + len);
+ __flush_dcache((unsigned long)src, (unsigned long)src + len);
if (vma->vm_flags & VM_EXEC)
__flush_icache((unsigned long)src, (unsigned long)src + len);
}
@@ -279,7 +263,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
{
flush_cache_page(vma, user_vaddr, page_to_pfn(page));
memcpy(dst, src, len);
- __flush_dcache_all((unsigned long)dst, (unsigned long)dst + len);
+ __flush_dcache((unsigned long)dst, (unsigned long)dst + len);
if (vma->vm_flags & VM_EXEC)
__flush_icache((unsigned long)dst, (unsigned long)dst + len);
}
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index c36546959e86..729f89163bc3 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -108,6 +108,9 @@ config PGTABLE_LEVELS
default 3 if 64BIT && PARISC_PAGE_SIZE_4KB
default 2
+config SYS_SUPPORTS_HUGETLBFS
+ def_bool y if PA20
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"
diff --git a/arch/parisc/include/asm/hugetlb.h b/arch/parisc/include/asm/hugetlb.h
new file mode 100644
index 000000000000..7d56a9ccb752
--- /dev/null
+++ b/arch/parisc/include/asm/hugetlb.h
@@ -0,0 +1,85 @@
+#ifndef _ASM_PARISC64_HUGETLB_H
+#define _ASM_PARISC64_HUGETLB_H
+
+#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
+
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte);
+
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep);
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+ unsigned long addr,
+ unsigned long len) {
+ return 0;
+}
+
+/*
+ * If the arch doesn't supply something else, assume that hugepage
+ * size aligned regions are ok without further preparation.
+ */
+static inline int prepare_hugepage_range(struct file *file,
+ unsigned long addr, unsigned long len)
+{
+ if (len & ~HPAGE_MASK)
+ return -EINVAL;
+ if (addr & ~HPAGE_MASK)
+ return -EINVAL;
+ return 0;
+}
+
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+ unsigned long addr, unsigned long end,
+ unsigned long floor,
+ unsigned long ceiling)
+{
+ free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+ return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+ return pte_wrprotect(pte);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_t old_pte = *ptep;
+ set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, int dirty)
+{
+ int changed = !pte_same(*ptep, pte);
+ if (changed) {
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+ flush_tlb_page(vma, addr);
+ }
+ return changed;
+}
+
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+ return *ptep;
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
+#endif /* _ASM_PARISC64_HUGETLB_H */
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index 60d5d174dfe4..80e742a1c162 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -145,11 +145,22 @@ extern int npmem_ranges;
#endif /* CONFIG_DISCONTIGMEM */
#ifdef CONFIG_HUGETLB_PAGE
-#define HPAGE_SHIFT 22 /* 4MB (is this fixed?) */
+#define HPAGE_SHIFT PMD_SHIFT /* fixed for transparent huge pages */
#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
+
+#if defined(CONFIG_64BIT) && defined(CONFIG_PARISC_PAGE_SIZE_4KB)
+# define REAL_HPAGE_SHIFT 20 /* 20 = 1MB */
+# define _HUGE_PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_1M
+#elif !defined(CONFIG_64BIT) && defined(CONFIG_PARISC_PAGE_SIZE_4KB)
+# define REAL_HPAGE_SHIFT 22 /* 22 = 4MB */
+# define _HUGE_PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_4M
+#else
+# define REAL_HPAGE_SHIFT 24 /* 24 = 16MB */
+# define _HUGE_PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_16M
#endif
+#endif /* CONFIG_HUGETLB_PAGE */
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index 3edbb9fc91b4..f2fd327dce2e 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -35,7 +35,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
PxD_FLAG_VALID |
PxD_FLAG_ATTACHED)
+ (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT));
- /* The first pmd entry also is marked with _PAGE_GATEWAY as
+ /* The first pmd entry also is marked with PxD_FLAG_ATTACHED as
* a signal that this pmd may not be freed */
__pgd_val_set(*pgd, PxD_FLAG_ATTACHED);
#endif
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index f93c4a4e6580..291cee28ccb6 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -83,7 +83,11 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, (unsigned long)pgd_val(e))
/* This is the size of the initially mapped kernel memory */
-#define KERNEL_INITIAL_ORDER 24 /* 0 to 1<<24 = 16MB */
+#ifdef CONFIG_64BIT
+#define KERNEL_INITIAL_ORDER 25 /* 1<<25 = 32MB */
+#else
+#define KERNEL_INITIAL_ORDER 24 /* 1<<24 = 16MB */
+#endif
#define KERNEL_INITIAL_SIZE (1 << KERNEL_INITIAL_ORDER)
#if CONFIG_PGTABLE_LEVELS == 3
@@ -167,7 +171,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
#define _PAGE_NO_CACHE_BIT 24 /* (0x080) Uncached Page (U bit) */
#define _PAGE_ACCESSED_BIT 23 /* (0x100) Software: Page Accessed */
#define _PAGE_PRESENT_BIT 22 /* (0x200) Software: translation valid */
-/* bit 21 was formerly the FLUSH bit but is now unused */
+#define _PAGE_HPAGE_BIT 21 /* (0x400) Software: Huge Page */
#define _PAGE_USER_BIT 20 /* (0x800) Software: User accessible page */
/* N.B. The bits are defined in terms of a 32 bit word above, so the */
@@ -194,6 +198,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
#define _PAGE_NO_CACHE (1 << xlate_pabit(_PAGE_NO_CACHE_BIT))
#define _PAGE_ACCESSED (1 << xlate_pabit(_PAGE_ACCESSED_BIT))
#define _PAGE_PRESENT (1 << xlate_pabit(_PAGE_PRESENT_BIT))
+#define _PAGE_HUGE (1 << xlate_pabit(_PAGE_HPAGE_BIT))
#define _PAGE_USER (1 << xlate_pabit(_PAGE_USER_BIT))
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)
@@ -217,7 +222,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
#define PxD_FLAG_VALID (1 << xlate_pabit(_PxD_VALID_BIT))
#define PxD_FLAG_MASK (0xf)
#define PxD_FLAG_SHIFT (4)
-#define PxD_VALUE_SHIFT (8) /* (PAGE_SHIFT-PxD_FLAG_SHIFT) */
+#define PxD_VALUE_SHIFT (PFN_PTE_SHIFT-PxD_FLAG_SHIFT)
#ifndef __ASSEMBLY__
@@ -363,6 +368,19 @@ static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return
static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
/*
+ * Huge pte definitions.
+ */
+#ifdef CONFIG_HUGETLB_PAGE
+#define pte_huge(pte) (pte_val(pte) & _PAGE_HUGE)
+#define pte_mkhuge(pte) (__pte(pte_val(pte) | \
+ (parisc_requires_coherency() ? 0 : _PAGE_HUGE)))
+#else
+#define pte_huge(pte) (0)
+#define pte_mkhuge(pte) (pte)
+#endif
+
+
+/*
* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
*/
@@ -410,8 +428,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
/* Find an entry in the second-level page table.. */
#if CONFIG_PGTABLE_LEVELS == 3
+#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
#define pmd_offset(dir,address) \
-((pmd_t *) pgd_page_vaddr(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1)))
+((pmd_t *) pgd_page_vaddr(*(dir)) + pmd_index(address))
#else
#define pmd_offset(dir,addr) ((pmd_t *) dir)
#endif
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index 54adb60c0a42..7e759ecb1343 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -192,33 +192,6 @@ void show_trace(struct task_struct *task, unsigned long *stack);
*/
typedef unsigned int elf_caddr_t;
-#define start_thread_som(regs, new_pc, new_sp) do { \
- unsigned long *sp = (unsigned long *)new_sp; \
- __u32 spaceid = (__u32)current->mm->context; \
- unsigned long pc = (unsigned long)new_pc; \
- /* offset pc for priv. level */ \
- pc |= 3; \
- \
- regs->iasq[0] = spaceid; \
- regs->iasq[1] = spaceid; \
- regs->iaoq[0] = pc; \
- regs->iaoq[1] = pc + 4; \
- regs->sr[2] = LINUX_GATEWAY_SPACE; \
- regs->sr[3] = 0xffff; \
- regs->sr[4] = spaceid; \
- regs->sr[5] = spaceid; \
- regs->sr[6] = spaceid; \
- regs->sr[7] = spaceid; \
- regs->gr[ 0] = USER_PSW; \
- regs->gr[30] = ((new_sp)+63)&~63; \
- regs->gr[31] = pc; \
- \
- get_user(regs->gr[26],&sp[0]); \
- get_user(regs->gr[25],&sp[-1]); \
- get_user(regs->gr[24],&sp[-2]); \
- get_user(regs->gr[23],&sp[-3]); \
-} while(0)
-
/* The ELF abi wants things done a "wee bit" differently than
* som does. Supporting this behavior here avoids
* having our own version of create_elf_tables.
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index ecc3ae1ca28e..dd4d1876a020 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -49,16 +49,6 @@
#define MADV_DONTFORK 10 /* don't inherit across fork */
#define MADV_DOFORK 11 /* do inherit across fork */
-/* The range 12-64 is reserved for page size specification. */
-#define MADV_4K_PAGES 12 /* Use 4K pages */
-#define MADV_16K_PAGES 14 /* Use 16K pages */
-#define MADV_64K_PAGES 16 /* Use 64K pages */
-#define MADV_256K_PAGES 18 /* Use 256K pages */
-#define MADV_1M_PAGES 20 /* Use 1 Megabyte pages */
-#define MADV_4M_PAGES 22 /* Use 4 Megabyte pages */
-#define MADV_16M_PAGES 24 /* Use 16 Megabyte pages */
-#define MADV_64M_PAGES 26 /* Use 64 Megabyte pages */
-
#define MADV_MERGEABLE 65 /* KSM may merge identical pages */
#define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */
diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h
index 33170384d3ac..35bdccbb2036 100644
--- a/arch/parisc/include/uapi/asm/unistd.h
+++ b/arch/parisc/include/uapi/asm/unistd.h
@@ -360,8 +360,9 @@
#define __NR_execveat (__NR_Linux + 342)
#define __NR_membarrier (__NR_Linux + 343)
#define __NR_userfaultfd (__NR_Linux + 344)
+#define __NR_mlock2 (__NR_Linux + 345)
-#define __NR_Linux_syscalls (__NR_userfaultfd + 1)
+#define __NR_Linux_syscalls (__NR_mlock2 + 1)
#define __IGNORE_select /* newselect */
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index 59001cea13f9..d2f62570a7b1 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -290,6 +290,14 @@ int main(void)
DEFINE(ASM_PFN_PTE_SHIFT, PFN_PTE_SHIFT);
DEFINE(ASM_PT_INITIAL, PT_INITIAL);
BLANK();
+ /* HUGEPAGE_SIZE is only used in vmlinux.lds.S to align kernel text
+ * and kernel data on physical huge pages */
+#ifdef CONFIG_HUGETLB_PAGE
+ DEFINE(HUGEPAGE_SIZE, 1UL << REAL_HPAGE_SHIFT);
+#else
+ DEFINE(HUGEPAGE_SIZE, PAGE_SIZE);
+#endif
+ BLANK();
DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip));
DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space));
DEFINE(EXCDATA_ADDR, offsetof(struct exception_data, fault_addr));
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index c5ef4081b01d..623496c11756 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -502,21 +502,38 @@
STREG \pte,0(\ptp)
.endm
+ /* We have (depending on the page size):
+ * - 38 to 52-bit Physical Page Number
+ * - 12 to 26-bit page offset
+ */
/* bitshift difference between a PFN (based on kernel's PAGE_SIZE)
* to a CPU TLB 4k PFN (4k => 12 bits to shift) */
- #define PAGE_ADD_SHIFT (PAGE_SHIFT-12)
+ #define PAGE_ADD_SHIFT (PAGE_SHIFT-12)
+ #define PAGE_ADD_HUGE_SHIFT (REAL_HPAGE_SHIFT-12)
/* Drop prot bits and convert to page addr for iitlbt and idtlbt */
- .macro convert_for_tlb_insert20 pte
+ .macro convert_for_tlb_insert20 pte,tmp
+#ifdef CONFIG_HUGETLB_PAGE
+ copy \pte,\tmp
+ extrd,u \tmp,(63-ASM_PFN_PTE_SHIFT)+(63-58)+PAGE_ADD_SHIFT,\
+ 64-PAGE_SHIFT-PAGE_ADD_SHIFT,\pte
+
+ depdi _PAGE_SIZE_ENCODING_DEFAULT,63,\
+ (63-58)+PAGE_ADD_SHIFT,\pte
+ extrd,u,*= \tmp,_PAGE_HPAGE_BIT+32,1,%r0
+ depdi _HUGE_PAGE_SIZE_ENCODING_DEFAULT,63,\
+ (63-58)+PAGE_ADD_HUGE_SHIFT,\pte
+#else /* Huge pages disabled */
extrd,u \pte,(63-ASM_PFN_PTE_SHIFT)+(63-58)+PAGE_ADD_SHIFT,\
64-PAGE_SHIFT-PAGE_ADD_SHIFT,\pte
depdi _PAGE_SIZE_ENCODING_DEFAULT,63,\
(63-58)+PAGE_ADD_SHIFT,\pte
+#endif
.endm
/* Convert the pte and prot to tlb insertion values. How
* this happens is quite subtle, read below */
- .macro make_insert_tlb spc,pte,prot
+ .macro make_insert_tlb spc,pte,prot,tmp
space_to_prot \spc \prot /* create prot id from space */
/* The following is the real subtlety. This is depositing
* T <-> _PAGE_REFTRAP
@@ -553,7 +570,7 @@
depdi 1,12,1,\prot
/* Drop prot bits and convert to page addr for iitlbt and idtlbt */
- convert_for_tlb_insert20 \pte
+ convert_for_tlb_insert20 \pte \tmp
.endm
/* Identical macro to make_insert_tlb above, except it
@@ -646,17 +663,12 @@
/*
- * Align fault_vector_20 on 4K boundary so that both
- * fault_vector_11 and fault_vector_20 are on the
- * same page. This is only necessary as long as we
- * write protect the kernel text, which we may stop
- * doing once we use large page translations to cover
- * the static part of the kernel address space.
+ * Fault_vectors are architecturally required to be aligned on a 2K
+ * boundary
*/
.text
-
- .align 4096
+ .align 2048
ENTRY(fault_vector_20)
/* First vector is invalid (0) */
@@ -1147,7 +1159,7 @@ dtlb_miss_20w:
tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20w
update_accessed ptp,pte,t0,t1
- make_insert_tlb spc,pte,prot
+ make_insert_tlb spc,pte,prot,t1
idtlbt pte,prot
@@ -1173,7 +1185,7 @@ nadtlb_miss_20w:
tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20w
update_accessed ptp,pte,t0,t1
- make_insert_tlb spc,pte,prot
+ make_insert_tlb spc,pte,prot,t1
idtlbt pte,prot
@@ -1267,7 +1279,7 @@ dtlb_miss_20:
tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20
update_accessed ptp,pte,t0,t1
- make_insert_tlb spc,pte,prot
+ make_insert_tlb spc,pte,prot,t1
f_extend pte,t1
@@ -1295,7 +1307,7 @@ nadtlb_miss_20:
tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20
update_accessed ptp,pte,t0,t1
- make_insert_tlb spc,pte,prot
+ make_insert_tlb spc,pte,prot,t1
f_extend pte,t1
@@ -1404,7 +1416,7 @@ itlb_miss_20w:
tlb_lock spc,ptp,pte,t0,t1,itlb_fault
update_accessed ptp,pte,t0,t1
- make_insert_tlb spc,pte,prot
+ make_insert_tlb spc,pte,prot,t1
iitlbt pte,prot
@@ -1428,7 +1440,7 @@ naitlb_miss_20w:
tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20w
update_accessed ptp,pte,t0,t1
- make_insert_tlb spc,pte,prot
+ make_insert_tlb spc,pte,prot,t1
iitlbt pte,prot
@@ -1514,7 +1526,7 @@ itlb_miss_20:
tlb_lock spc,ptp,pte,t0,t1,itlb_fault
update_accessed ptp,pte,t0,t1
- make_insert_tlb spc,pte,prot
+ make_insert_tlb spc,pte,prot,t1
f_extend pte,t1
@@ -1534,7 +1546,7 @@ naitlb_miss_20:
tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20
update_accessed ptp,pte,t0,t1
- make_insert_tlb spc,pte,prot
+ make_insert_tlb spc,pte,prot,t1
f_extend pte,t1
@@ -1566,7 +1578,7 @@ dbit_trap_20w:
tlb_lock spc,ptp,pte,t0,t1,dbit_fault
update_dirty ptp,pte,t1
- make_insert_tlb spc,pte,prot
+ make_insert_tlb spc,pte,prot,t1
idtlbt pte,prot
@@ -1610,7 +1622,7 @@ dbit_trap_20:
tlb_lock spc,ptp,pte,t0,t1,dbit_fault
update_dirty ptp,pte,t1
- make_insert_tlb spc,pte,prot
+ make_insert_tlb spc,pte,prot,t1
f_extend pte,t1
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
index e7d64527aff9..75aa0db9f69e 100644
--- a/arch/parisc/kernel/head.S
+++ b/arch/parisc/kernel/head.S
@@ -69,7 +69,7 @@ $bss_loop:
stw,ma %arg2,4(%r1)
stw,ma %arg3,4(%r1)
- /* Initialize startup VM. Just map first 8/16 MB of memory */
+ /* Initialize startup VM. Just map first 16/32 MB of memory */
load32 PA(swapper_pg_dir),%r4
mtctl %r4,%cr24 /* Initialize kernel root pointer */
mtctl %r4,%cr25 /* Initialize user root pointer */
@@ -107,7 +107,7 @@ $bss_loop:
/* Now initialize the PTEs themselves. We use RWX for
* everything ... it will get remapped correctly later */
ldo 0+_PAGE_KERNEL_RWX(%r0),%r3 /* Hardwired 0 phys addr start */
- ldi (1<<(KERNEL_INITIAL_ORDER-PAGE_SHIFT)),%r11 /* PFN count */
+ load32 (1<<(KERNEL_INITIAL_ORDER-PAGE_SHIFT)),%r11 /* PFN count */
load32 PA(pg0),%r1
$pgt_fill_loop:
diff --git a/arch/parisc/kernel/pci.c b/arch/parisc/kernel/pci.c
index 64f2764a8cef..c99f3dde455c 100644
--- a/arch/parisc/kernel/pci.c
+++ b/arch/parisc/kernel/pci.c
@@ -171,24 +171,6 @@ void pcibios_set_master(struct pci_dev *dev)
}
-void __init pcibios_init_bus(struct pci_bus *bus)
-{
- struct pci_dev *dev = bus->self;
- unsigned short bridge_ctl;
-
- /* We deal only with pci controllers and pci-pci bridges. */
- if (!dev || (dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
- return;
-
- /* PCI-PCI bridge - set the cache line and default latency
- (32) for primary and secondary buses. */
- pci_write_config_byte(dev, PCI_SEC_LATENCY_TIMER, 32);
-
- pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &bridge_ctl);
- bridge_ctl |= PCI_BRIDGE_CTL_PARITY | PCI_BRIDGE_CTL_SERR;
- pci_write_config_word(dev, PCI_BRIDGE_CONTROL, bridge_ctl);
-}
-
/*
* pcibios align resources() is called every time generic PCI code
* wants to generate a new address. The process of looking for
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 72a3c658ad7b..f7ea626e29c9 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -130,7 +130,16 @@ void __init setup_arch(char **cmdline_p)
printk(KERN_INFO "The 32-bit Kernel has started...\n");
#endif
- printk(KERN_INFO "Default page size is %dKB.\n", (int)(PAGE_SIZE / 1024));
+ printk(KERN_INFO "Kernel default page size is %d KB. Huge pages ",
+ (int)(PAGE_SIZE / 1024));
+#ifdef CONFIG_HUGETLB_PAGE
+ printk(KERN_CONT "enabled with %d MB physical and %d MB virtual size",
+ 1 << (REAL_HPAGE_SHIFT - 20), 1 << (HPAGE_SHIFT - 20));
+#else
+ printk(KERN_CONT "disabled");
+#endif
+ printk(KERN_CONT ".\n");
+
pdc_console_init();
@@ -377,6 +386,7 @@ arch_initcall(parisc_init);
void start_parisc(void)
{
extern void start_kernel(void);
+ extern void early_trap_init(void);
int ret, cpunum;
struct pdc_coproc_cfg coproc_cfg;
@@ -397,6 +407,8 @@ void start_parisc(void)
panic("must have an fpu to boot linux");
}
+ early_trap_init(); /* initialize checksum of fault_vector */
+
start_kernel();
// not reached
}
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index dc1ea796fd60..2264f68f3c2f 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -435,6 +435,55 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs, int in_syscall)
regs->gr[28]);
}
+/*
+ * Check how the syscall number gets loaded into %r20 within
+ * the delay branch in userspace and adjust as needed.
+ */
+
+static void check_syscallno_in_delay_branch(struct pt_regs *regs)
+{
+ u32 opcode, source_reg;
+ u32 __user *uaddr;
+ int err;
+
+ /* Usually we don't have to restore %r20 (the system call number)
+ * because it gets loaded in the delay slot of the branch external
+ * instruction via the ldi instruction.
+ * In some cases a register-to-register copy instruction might have
+ * been used instead, in which case we need to copy the syscall
+ * number into the source register before returning to userspace.
+ */
+
+ /* A syscall is just a branch, so all we have to do is fiddle the
+ * return pointer so that the ble instruction gets executed again.
+ */
+ regs->gr[31] -= 8; /* delayed branching */
+
+ /* Get assembler opcode of code in delay branch */
+ uaddr = (unsigned int *) ((regs->gr[31] & ~3) + 4);
+ err = get_user(opcode, uaddr);
+ if (err)
+ return;
+
+ /* Check if delay branch uses "ldi int,%r20" */
+ if ((opcode & 0xffff0000) == 0x34140000)
+ return; /* everything ok, just return */
+
+ /* Check if delay branch uses "nop" */
+ if (opcode == INSN_NOP)
+ return;
+
+ /* Check if delay branch uses "copy %rX,%r20" */
+ if ((opcode & 0xffe0ffff) == 0x08000254) {
+ source_reg = (opcode >> 16) & 31;
+ regs->gr[source_reg] = regs->gr[20];
+ return;
+ }
+
+ pr_warn("syscall restart: %s (pid %d): unexpected opcode 0x%08x\n",
+ current->comm, task_pid_nr(current), opcode);
+}
+
static inline void
syscall_restart(struct pt_regs *regs, struct k_sigaction *ka)
{
@@ -457,10 +506,7 @@ syscall_restart(struct pt_regs *regs, struct k_sigaction *ka)
}
/* fallthrough */
case -ERESTARTNOINTR:
- /* A syscall is just a branch, so all
- * we have to do is fiddle the return pointer.
- */
- regs->gr[31] -= 8; /* delayed branching */
+ check_syscallno_in_delay_branch(regs);
break;
}
}
@@ -510,15 +556,9 @@ insert_restart_trampoline(struct pt_regs *regs)
}
case -ERESTARTNOHAND:
case -ERESTARTSYS:
- case -ERESTARTNOINTR: {
- /* Hooray for delayed branching. We don't
- * have to restore %r20 (the system call
- * number) because it gets loaded in the delay
- * slot of the branch external instruction.
- */
- regs->gr[31] -= 8;
+ case -ERESTARTNOINTR:
+ check_syscallno_in_delay_branch(regs);
return;
- }
default:
break;
}
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 0b8d26d3ba43..3fbd7252a4b2 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -369,7 +369,7 @@ tracesys_exit:
ldo -16(%r30),%r29 /* Reference param save area */
#endif
ldo TASK_REGS(%r1),%r26
- bl do_syscall_trace_exit,%r2
+ BL do_syscall_trace_exit,%r2
STREG %r28,TASK_PT_GR28(%r1) /* save return value now */
ldo -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 /* get task ptr */
LDREG TI_TASK(%r1), %r1
@@ -390,7 +390,7 @@ tracesys_sigexit:
#ifdef CONFIG_64BIT
ldo -16(%r30),%r29 /* Reference param save area */
#endif
- bl do_syscall_trace_exit,%r2
+ BL do_syscall_trace_exit,%r2
ldo TASK_REGS(%r1),%r26
ldil L%syscall_exit_rfi,%r1
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 78c3ef8c348d..d4ffcfbc9885 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -440,6 +440,7 @@
ENTRY_COMP(execveat)
ENTRY_SAME(membarrier)
ENTRY_SAME(userfaultfd)
+ ENTRY_SAME(mlock2) /* 345 */
.ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b))
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index b99b39f1da02..553b09855cfd 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -807,7 +807,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
}
-int __init check_ivt(void *iva)
+void __init initialize_ivt(const void *iva)
{
extern u32 os_hpmc_size;
extern const u32 os_hpmc[];
@@ -818,8 +818,8 @@ int __init check_ivt(void *iva)
u32 *hpmcp;
u32 length;
- if (strcmp((char *)iva, "cows can fly"))
- return -1;
+ if (strcmp((const char *)iva, "cows can fly"))
+ panic("IVT invalid");
ivap = (u32 *)iva;
@@ -839,28 +839,23 @@ int __init check_ivt(void *iva)
check += ivap[i];
ivap[5] = -check;
-
- return 0;
}
-#ifndef CONFIG_64BIT
-extern const void fault_vector_11;
-#endif
-extern const void fault_vector_20;
-void __init trap_init(void)
+/* early_trap_init() is called before we set up kernel mappings and
+ * write-protect the kernel */
+void __init early_trap_init(void)
{
- void *iva;
+ extern const void fault_vector_20;
- if (boot_cpu_data.cpu_type >= pcxu)
- iva = (void *) &fault_vector_20;
- else
-#ifdef CONFIG_64BIT
- panic("Can't boot 64-bit OS on PA1.1 processor!");
-#else
- iva = (void *) &fault_vector_11;
+#ifndef CONFIG_64BIT
+ extern const void fault_vector_11;
+ initialize_ivt(&fault_vector_11);
#endif
- if (check_ivt(iva))
- panic("IVT invalid");
+ initialize_ivt(&fault_vector_20);
+}
+
+void __init trap_init(void)
+{
}
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 0dacc5ca555a..308f29081d46 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -60,7 +60,7 @@ SECTIONS
EXIT_DATA
}
PERCPU_SECTION(8)
- . = ALIGN(PAGE_SIZE);
+ . = ALIGN(HUGEPAGE_SIZE);
__init_end = .;
/* freed after init ends here */
@@ -116,7 +116,7 @@ SECTIONS
* that we can properly leave these
* as writable
*/
- . = ALIGN(PAGE_SIZE);
+ . = ALIGN(HUGEPAGE_SIZE);
data_start = .;
EXCEPTION_TABLE(8)
@@ -135,8 +135,11 @@ SECTIONS
_edata = .;
/* BSS */
- BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 8)
+ BSS_SECTION(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE)
+
+ /* bootmap is allocated in setup_bootmem() directly behind bss. */
+ . = ALIGN(HUGEPAGE_SIZE);
_end = . ;
STABS_DEBUG
diff --git a/arch/parisc/mm/Makefile b/arch/parisc/mm/Makefile
index 758ceefb373a..134393de69d2 100644
--- a/arch/parisc/mm/Makefile
+++ b/arch/parisc/mm/Makefile
@@ -3,3 +3,4 @@
#
obj-y := init.o fault.o ioremap.o
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c
new file mode 100644
index 000000000000..f6fdc77a72bd
--- /dev/null
+++ b/arch/parisc/mm/hugetlbpage.c
@@ -0,0 +1,161 @@
+/*
+ * PARISC64 Huge TLB page support.
+ *
+ * This parisc implementation is heavily based on the SPARC and x86 code.
+ *
+ * Copyright (C) 2015 Helge Deller <deller@gmx.de>
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/sysctl.h>
+
+#include <asm/mman.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+
+
+unsigned long
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+
+ if (len & ~huge_page_mask(h))
+ return -EINVAL;
+ if (len > TASK_SIZE)
+ return -ENOMEM;
+
+ if (flags & MAP_FIXED)
+ if (prepare_hugepage_range(file, addr, len))
+ return -EINVAL;
+
+ if (addr)
+ addr = ALIGN(addr, huge_page_size(h));
+
+ /* we need to make sure the colouring is OK */
+ return arch_get_unmapped_area(file, addr, len, pgoff, flags);
+}
+
+
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+ unsigned long addr, unsigned long sz)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte = NULL;
+
+ /* We must align the address, because our caller will run
+ * set_huge_pte_at() on whatever we return, which writes out
+ * all of the sub-ptes for the hugepage range. So we have
+ * to give it the first such sub-pte.
+ */
+ addr &= HPAGE_MASK;
+
+ pgd = pgd_offset(mm, addr);
+ pud = pud_alloc(mm, pgd, addr);
+ if (pud) {
+ pmd = pmd_alloc(mm, pud, addr);
+ if (pmd)
+ pte = pte_alloc_map(mm, NULL, pmd, addr);
+ }
+ return pte;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte = NULL;
+
+ addr &= HPAGE_MASK;
+
+ pgd = pgd_offset(mm, addr);
+ if (!pgd_none(*pgd)) {
+ pud = pud_offset(pgd, addr);
+ if (!pud_none(*pud)) {
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_none(*pmd))
+ pte = pte_offset_map(pmd, addr);
+ }
+ }
+ return pte;
+}
+
+/* Purge data and instruction TLB entries. Must be called holding
+ * the pa_tlb_lock. The TLB purge instructions are slow on SMP
+ * machines since the purge must be broadcast to all CPUs.
+ */
+static inline void purge_tlb_entries_huge(struct mm_struct *mm, unsigned long addr)
+{
+ int i;
+
+ /* We may use multiple physical huge pages (e.g. 2x1 MB) to emulate
+ * Linux standard huge pages (e.g. 2 MB) */
+ BUILD_BUG_ON(REAL_HPAGE_SHIFT > HPAGE_SHIFT);
+
+ addr &= HPAGE_MASK;
+ addr |= _HUGE_PAGE_SIZE_ENCODING_DEFAULT;
+
+ for (i = 0; i < (1 << (HPAGE_SHIFT-REAL_HPAGE_SHIFT)); i++) {
+ mtsp(mm->context, 1);
+ pdtlb(addr);
+ if (unlikely(split_tlb))
+ pitlb(addr);
+ addr += (1UL << REAL_HPAGE_SHIFT);
+ }
+}
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t entry)
+{
+ unsigned long addr_start;
+ int i;
+
+ addr &= HPAGE_MASK;
+ addr_start = addr;
+
+ for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+ /* Directly write pte entry. We could call set_pte_at(mm, addr, ptep, entry)
+ * instead, but then we get double locking on pa_tlb_lock. */
+ *ptep = entry;
+ ptep++;
+
+ /* Drop the PAGE_SIZE/non-huge tlb entry */
+ purge_tlb_entries(mm, addr);
+
+ addr += PAGE_SIZE;
+ pte_val(entry) += PAGE_SIZE;
+ }
+
+ purge_tlb_entries_huge(mm, addr_start);
+}
+
+
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+ pte_t entry;
+
+ entry = *ptep;
+ set_huge_pte_at(mm, addr, ptep, __pte(0));
+
+ return entry;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+ return 0;
+}
+
+int pud_huge(pud_t pud)
+{
+ return 0;
+}
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index c5fec4890fdf..1b366c477687 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -409,15 +409,11 @@ static void __init map_pages(unsigned long start_vaddr,
unsigned long vaddr;
unsigned long ro_start;
unsigned long ro_end;
- unsigned long fv_addr;
- unsigned long gw_addr;
- extern const unsigned long fault_vector_20;
- extern void * const linux_gateway_page;
+ unsigned long kernel_end;
ro_start = __pa((unsigned long)_text);
ro_end = __pa((unsigned long)&data_start);
- fv_addr = __pa((unsigned long)&fault_vector_20) & PAGE_MASK;
- gw_addr = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK;
+ kernel_end = __pa((unsigned long)&_end);
end_paddr = start_paddr + size;
@@ -475,24 +471,25 @@ static void __init map_pages(unsigned long start_vaddr,
for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++, pg_table++) {
pte_t pte;
- /*
- * Map the fault vector writable so we can
- * write the HPMC checksum.
- */
if (force)
pte = __mk_pte(address, pgprot);
- else if (parisc_text_address(vaddr) &&
- address != fv_addr)
+ else if (parisc_text_address(vaddr)) {
pte = __mk_pte(address, PAGE_KERNEL_EXEC);
+ if (address >= ro_start && address < kernel_end)
+ pte = pte_mkhuge(pte);
+ }
else
#if defined(CONFIG_PARISC_PAGE_SIZE_4KB)
- if (address >= ro_start && address < ro_end
- && address != fv_addr
- && address != gw_addr)
- pte = __mk_pte(address, PAGE_KERNEL_RO);
- else
+ if (address >= ro_start && address < ro_end) {
+ pte = __mk_pte(address, PAGE_KERNEL_EXEC);
+ pte = pte_mkhuge(pte);
+ } else
#endif
+ {
pte = __mk_pte(address, pgprot);
+ if (address >= ro_start && address < kernel_end)
+ pte = pte_mkhuge(pte);
+ }
if (address >= end_paddr) {
if (force)
@@ -536,15 +533,12 @@ void free_initmem(void)
/* force the kernel to see the new TLB entries */
__flush_tlb_range(0, init_begin, init_end);
- /* Attempt to catch anyone trying to execute code here
- * by filling the page with BRK insns.
- */
- memset((void *)init_begin, 0x00, init_end - init_begin);
+
/* finally dump all the instructions which were cached, since the
* pages are no-longer executable */
flush_icache_range(init_begin, init_end);
- free_initmem_default(-1);
+ free_initmem_default(POISON_FREE_INITMEM);
/* set up a new led state on systems shipped LED State panel */
pdc_chassis_send_status(PDC_CHASSIS_DIRECT_BCOMPLETE);
@@ -728,8 +722,8 @@ static void __init pagetable_init(void)
unsigned long size;
start_paddr = pmem_ranges[range].start_pfn << PAGE_SHIFT;
- end_paddr = start_paddr + (pmem_ranges[range].pages << PAGE_SHIFT);
size = pmem_ranges[range].pages << PAGE_SHIFT;
+ end_paddr = start_paddr + size;
map_pages((unsigned long)__va(start_paddr), start_paddr,
size, PAGE_KERNEL, 0);
diff --git a/arch/powerpc/boot/dts/sbc8641d.dts b/arch/powerpc/boot/dts/sbc8641d.dts
index 631ede72e226..68f0ed7626bd 100644
--- a/arch/powerpc/boot/dts/sbc8641d.dts
+++ b/arch/powerpc/boot/dts/sbc8641d.dts
@@ -227,23 +227,15 @@
reg = <0x520 0x20>;
phy0: ethernet-phy@1f {
- interrupt-parent = <&mpic>;
- interrupts = <10 1>;
reg = <0x1f>;
};
phy1: ethernet-phy@0 {
- interrupt-parent = <&mpic>;
- interrupts = <10 1>;
reg = <0>;
};
phy2: ethernet-phy@1 {
- interrupt-parent = <&mpic>;
- interrupts = <10 1>;
reg = <1>;
};
phy3: ethernet-phy@2 {
- interrupt-parent = <&mpic>;
- interrupts = <10 1>;
reg = <2>;
};
tbi0: tbi-phy@11 {
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index 0eca6efc0631..a7af5fb7b914 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -34,7 +34,7 @@
#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
-#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#ifdef __SUBARCH_HAS_LWSYNC
# define SMPWMB LWSYNC
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index a908ada8e0a5..2220f7a60def 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -108,6 +108,7 @@
#define MSR_TS_T __MASK(MSR_TS_T_LG) /* Transaction Transactional */
#define MSR_TS_MASK (MSR_TS_T | MSR_TS_S) /* Transaction State bits */
#define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */
+#define MSR_TM_RESV(x) (((x) & MSR_TS_MASK) == MSR_TS_MASK) /* Reserved */
#define MSR_TM_TRANSACTIONAL(x) (((x) & MSR_TS_MASK) == MSR_TS_T)
#define MSR_TM_SUSPENDED(x) (((x) & MSR_TS_MASK) == MSR_TS_S)
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index c9e26cb264f4..5654ece02c0d 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -370,15 +370,16 @@ COMPAT_SYS(execveat)
PPC64ONLY(switch_endian)
SYSCALL_SPU(userfaultfd)
SYSCALL_SPU(membarrier)
-SYSCALL(semop)
-SYSCALL(semget)
-COMPAT_SYS(semctl)
-COMPAT_SYS(semtimedop)
-COMPAT_SYS(msgsnd)
-COMPAT_SYS(msgrcv)
-SYSCALL(msgget)
-COMPAT_SYS(msgctl)
-COMPAT_SYS(shmat)
-SYSCALL(shmdt)
-SYSCALL(shmget)
-COMPAT_SYS(shmctl)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(ni_syscall)
+SYSCALL(mlock2)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 6d8f8023ac27..4b6b8ace18e0 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
#include <uapi/asm/unistd.h>
-#define __NR_syscalls 378
+#define __NR_syscalls 379
#define __NR__exit __NR_exit
#define NR_syscalls __NR_syscalls
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index 81579e93c659..12a05652377a 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -388,17 +388,6 @@
#define __NR_switch_endian 363
#define __NR_userfaultfd 364
#define __NR_membarrier 365
-#define __NR_semop 366
-#define __NR_semget 367
-#define __NR_semctl 368
-#define __NR_semtimedop 369
-#define __NR_msgsnd 370
-#define __NR_msgrcv 371
-#define __NR_msgget 372
-#define __NR_msgctl 373
-#define __NR_shmat 374
-#define __NR_shmdt 375
-#define __NR_shmget 376
-#define __NR_shmctl 377
+#define __NR_mlock2 378
#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 80dfe8965df9..8d14feb40f12 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -590,16 +590,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
eeh_ops->configure_bridge(pe);
eeh_pe_restore_bars(pe);
- /*
- * If it's PHB PE, the frozen state on all available PEs should have
- * been cleared by the PHB reset. Otherwise, we unfreeze the PE and its
- * child PEs because they might be in frozen state.
- */
- if (!(pe->type & EEH_PE_PHB)) {
- rc = eeh_clear_pe_frozen_state(pe, false);
- if (rc)
- return rc;
- }
+ /* Clear frozen state */
+ rc = eeh_clear_pe_frozen_state(pe, false);
+ if (rc)
+ return rc;
/* Give the system 5 seconds to finish running the user-space
* hotplug shutdown scripts, e.g. ifdown for ethernet. Yes,
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 75b6676c1a0b..646bf4d222c1 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -551,6 +551,24 @@ static void tm_reclaim_thread(struct thread_struct *thr,
msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1;
}
+ /*
+ * Use the current MSR TM suspended bit to track if we have
+ * checkpointed state outstanding.
+ * On signal delivery, we'd normally reclaim the checkpointed
+ * state to obtain stack pointer (see:get_tm_stackpointer()).
+ * This will then directly return to userspace without going
+ * through __switch_to(). However, if the stack frame is bad,
+ * we need to exit this thread which calls __switch_to() which
+ * will again attempt to reclaim the already saved tm state.
+ * Hence we need to check that we've not already reclaimed
+ * this state.
+ * We do this using the current MSR, rather tracking it in
+ * some specific thread_struct bit, as it has the additional
+ * benifit of checking for a potential TM bad thing exception.
+ */
+ if (!MSR_TM_SUSPENDED(mfmsr()))
+ return;
+
tm_reclaim(thr, thr->regs->msr, cause);
/* Having done the reclaim, we now have the checkpointed
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 0dbee465af7a..ef7c24e84a62 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -875,6 +875,15 @@ static long restore_tm_user_regs(struct pt_regs *regs,
return 1;
#endif /* CONFIG_SPE */
+ /* Get the top half of the MSR from the user context */
+ if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR]))
+ return 1;
+ msr_hi <<= 32;
+ /* If TM bits are set to the reserved value, it's an invalid context */
+ if (MSR_TM_RESV(msr_hi))
+ return 1;
+ /* Pull in the MSR TM bits from the user context */
+ regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK);
/* Now, recheckpoint. This loads up all of the checkpointed (older)
* registers, including FP and V[S]Rs. After recheckpointing, the
* transactional versions should be loaded.
@@ -884,11 +893,6 @@ static long restore_tm_user_regs(struct pt_regs *regs,
current->thread.tm_texasr |= TEXASR_FS;
/* This loads the checkpointed FP/VEC state, if used */
tm_recheckpoint(&current->thread, msr);
- /* Get the top half of the MSR */
- if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR]))
- return 1;
- /* Pull in MSR TM from user context */
- regs->msr = (regs->msr & ~MSR_TS_MASK) | ((msr_hi<<32) & MSR_TS_MASK);
/* This loads the speculative FP/VEC state, if used */
if (msr & MSR_FP) {
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 20756dfb9f34..c676ecec0869 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -438,6 +438,10 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
/* get MSR separately, transfer the LE bit if doing signal return */
err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
+ /* Don't allow reserved mode. */
+ if (MSR_TM_RESV(msr))
+ return -EINVAL;
+
/* pull in MSR TM from user context */
regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 54b45b73195f..a7352b59e6f9 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -224,6 +224,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
{
+ /*
+ * Check for illegal transactional state bit combination
+ * and if we find it, force the TS field to a safe state.
+ */
+ if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
+ msr &= ~MSR_TS_MASK;
vcpu->arch.shregs.msr = msr;
kvmppc_end_cede(vcpu);
}
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 04782164ee67..2d66a8446198 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -78,18 +78,9 @@ static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image,
PPC_LI(r_X, 0);
}
- switch (filter[0].code) {
- case BPF_RET | BPF_K:
- case BPF_LD | BPF_W | BPF_LEN:
- case BPF_LD | BPF_W | BPF_ABS:
- case BPF_LD | BPF_H | BPF_ABS:
- case BPF_LD | BPF_B | BPF_ABS:
- /* first instruction sets A register (or is RET 'constant') */
- break;
- default:
- /* make sure we dont leak kernel information to user */
+ /* make sure we dont leak kernel information to user */
+ if (bpf_needs_clear_a(&filter[0]))
PPC_LI(r_A, 0);
- }
}
static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index 6ccfb6c1c707..e505223b4ec5 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -43,11 +43,34 @@ static unsigned int opal_irq_count;
static unsigned int *opal_irqs;
static void opal_handle_irq_work(struct irq_work *work);
-static __be64 last_outstanding_events;
+static u64 last_outstanding_events;
static struct irq_work opal_event_irq_work = {
.func = opal_handle_irq_work,
};
+void opal_handle_events(uint64_t events)
+{
+ int virq, hwirq = 0;
+ u64 mask = opal_event_irqchip.mask;
+
+ if (!in_irq() && (events & mask)) {
+ last_outstanding_events = events;
+ irq_work_queue(&opal_event_irq_work);
+ return;
+ }
+
+ while (events & mask) {
+ hwirq = fls64(events) - 1;
+ if (BIT_ULL(hwirq) & mask) {
+ virq = irq_find_mapping(opal_event_irqchip.domain,
+ hwirq);
+ if (virq)
+ generic_handle_irq(virq);
+ }
+ events &= ~BIT_ULL(hwirq);
+ }
+}
+
static void opal_event_mask(struct irq_data *d)
{
clear_bit(d->hwirq, &opal_event_irqchip.mask);
@@ -55,9 +78,21 @@ static void opal_event_mask(struct irq_data *d)
static void opal_event_unmask(struct irq_data *d)
{
+ __be64 events;
+
set_bit(d->hwirq, &opal_event_irqchip.mask);
- opal_poll_events(&last_outstanding_events);
+ opal_poll_events(&events);
+ last_outstanding_events = be64_to_cpu(events);
+
+ /*
+ * We can't just handle the events now with opal_handle_events().
+ * If we did we would deadlock when opal_event_unmask() is called from
+ * handle_level_irq() with the irq descriptor lock held, because
+ * calling opal_handle_events() would call generic_handle_irq() and
+ * then handle_level_irq() which would try to take the descriptor lock
+ * again. Instead queue the events for later.
+ */
if (last_outstanding_events & opal_event_irqchip.mask)
/* Need to retrigger the interrupt */
irq_work_queue(&opal_event_irq_work);
@@ -96,29 +131,6 @@ static int opal_event_map(struct irq_domain *d, unsigned int irq,
return 0;
}
-void opal_handle_events(uint64_t events)
-{
- int virq, hwirq = 0;
- u64 mask = opal_event_irqchip.mask;
-
- if (!in_irq() && (events & mask)) {
- last_outstanding_events = events;
- irq_work_queue(&opal_event_irq_work);
- return;
- }
-
- while (events & mask) {
- hwirq = fls64(events) - 1;
- if (BIT_ULL(hwirq) & mask) {
- virq = irq_find_mapping(opal_event_irqchip.domain,
- hwirq);
- if (virq)
- generic_handle_irq(virq);
- }
- events &= ~BIT_ULL(hwirq);
- }
-}
-
static irqreturn_t opal_interrupt(int irq, void *data)
{
__be64 events;
@@ -131,7 +143,7 @@ static irqreturn_t opal_interrupt(int irq, void *data)
static void opal_handle_irq_work(struct irq_work *work)
{
- opal_handle_events(be64_to_cpu(last_outstanding_events));
+ opal_handle_events(last_outstanding_events);
}
static int opal_event_match(struct irq_domain *h, struct device_node *node,
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 4296d55e88f3..57cffb80bc36 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -278,7 +278,7 @@ static void opal_handle_message(void)
/* Sanity check */
if (type >= OPAL_MSG_TYPE_MAX) {
- pr_warning("%s: Unknown message type: %u\n", __func__, type);
+ pr_warn_once("%s: Unknown message type: %u\n", __func__, type);
return;
}
opal_message_do_notify(type, (void *)&msg);
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index d68e11e0df5e..7ffd0b19135c 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -36,7 +36,7 @@
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
-#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define smp_store_release(p, v) \
do { \
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index 0c5d8ee657f0..d1e7b0a0feeb 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -312,6 +312,7 @@ extern void css_schedule_reprobe(void);
extern void reipl_ccw_dev(struct ccw_dev_id *id);
struct cio_iplinfo {
+ u8 ssid;
u16 devno;
int is_qdio;
};
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 3ad48f22de78..bab6739a1154 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -206,9 +206,16 @@ do { \
} while (0)
#endif /* CONFIG_COMPAT */
-extern unsigned long mmap_rnd_mask;
-
-#define STACK_RND_MASK (test_thread_flag(TIF_31BIT) ? 0x7ff : mmap_rnd_mask)
+/*
+ * Cache aliasing on the latest machines calls for a mapping granularity
+ * of 512KB. For 64-bit processes use a 512KB alignment and a randomization
+ * of up to 1GB. For 31-bit processes the virtual address space is limited,
+ * use no alignment and limit the randomization to 8MB.
+ */
+#define BRK_RND_MASK (is_32bit_task() ? 0x7ffUL : 0x3ffffUL)
+#define MMAP_RND_MASK (is_32bit_task() ? 0x7ffUL : 0x3ff80UL)
+#define MMAP_ALIGN_MASK (is_32bit_task() ? 0 : 0x7fUL)
+#define STACK_RND_MASK MMAP_RND_MASK
#define ARCH_DLINFO \
do { \
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 39ae6a359747..86634e71b69f 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -64,7 +64,8 @@ struct ipl_block_fcp {
struct ipl_block_ccw {
u8 reserved1[84];
- u8 reserved2[2];
+ u16 reserved2 : 13;
+ u8 ssid : 3;
u16 devno;
u8 vm_flags;
u8 reserved3[3];
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 7a7abf1a5537..1aac41e83ea1 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -195,5 +195,7 @@ void zpci_dma_exit_device(struct zpci_dev *);
void dma_free_seg_table(unsigned long);
unsigned long *dma_alloc_cpu_table(void);
void dma_cleanup_tables(unsigned long *);
-void dma_update_cpu_trans(unsigned long *, void *, dma_addr_t, int);
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr);
+void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags);
+
#endif
diff --git a/arch/s390/include/asm/trace/diag.h b/arch/s390/include/asm/trace/diag.h
index 776f307960cc..cc6cfe7889da 100644
--- a/arch/s390/include/asm/trace/diag.h
+++ b/arch/s390/include/asm/trace/diag.h
@@ -19,7 +19,7 @@
#define TRACE_INCLUDE_PATH asm/trace
#define TRACE_INCLUDE_FILE diag
-TRACE_EVENT(diagnose,
+TRACE_EVENT(s390_diagnose,
TP_PROTO(unsigned short nr),
TP_ARGS(nr),
TP_STRUCT__entry(
@@ -32,9 +32,9 @@ TRACE_EVENT(diagnose,
);
#ifdef CONFIG_TRACEPOINTS
-void trace_diagnose_norecursion(int diag_nr);
+void trace_s390_diagnose_norecursion(int diag_nr);
#else
-static inline void trace_diagnose_norecursion(int diag_nr) { }
+static inline void trace_s390_diagnose_norecursion(int diag_nr) { }
#endif
#endif /* _TRACE_S390_DIAG_H */
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index a848adba1504..34ec202472c6 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -192,14 +192,14 @@
#define __NR_set_tid_address 252
#define __NR_fadvise64 253
#define __NR_timer_create 254
-#define __NR_timer_settime (__NR_timer_create+1)
-#define __NR_timer_gettime (__NR_timer_create+2)
-#define __NR_timer_getoverrun (__NR_timer_create+3)
-#define __NR_timer_delete (__NR_timer_create+4)
-#define __NR_clock_settime (__NR_timer_create+5)
-#define __NR_clock_gettime (__NR_timer_create+6)
-#define __NR_clock_getres (__NR_timer_create+7)
-#define __NR_clock_nanosleep (__NR_timer_create+8)
+#define __NR_timer_settime 255
+#define __NR_timer_gettime 256
+#define __NR_timer_getoverrun 257
+#define __NR_timer_delete 258
+#define __NR_clock_settime 259
+#define __NR_clock_gettime 260
+#define __NR_clock_getres 261
+#define __NR_clock_nanosleep 262
/* Number 263 is reserved for vserver */
#define __NR_statfs64 265
#define __NR_fstatfs64 266
@@ -309,7 +309,8 @@
#define __NR_recvfrom 371
#define __NR_recvmsg 372
#define __NR_shutdown 373
-#define NR_syscalls 374
+#define __NR_mlock2 374
+#define NR_syscalls 375
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index 09f194052df3..fac4eeddef91 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -176,3 +176,4 @@ COMPAT_SYSCALL_WRAP4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
COMPAT_SYSCALL_WRAP3(getsockname, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len);
COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len);
COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
+COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index f98766ede4e1..48b37b8357e6 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -121,14 +121,14 @@ device_initcall(show_diag_stat_init);
void diag_stat_inc(enum diag_stat_enum nr)
{
this_cpu_inc(diag_stat.counter[nr]);
- trace_diagnose(diag_map[nr].code);
+ trace_s390_diagnose(diag_map[nr].code);
}
EXPORT_SYMBOL(diag_stat_inc);
void diag_stat_inc_norecursion(enum diag_stat_enum nr)
{
this_cpu_inc(diag_stat.counter[nr]);
- trace_diagnose_norecursion(diag_map[nr].code);
+ trace_s390_diagnose_norecursion(diag_map[nr].code);
}
EXPORT_SYMBOL(diag_stat_inc_norecursion);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 8140d10c6785..6e72961608f0 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -1920,16 +1920,23 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
}
if (separator)
ptr += sprintf(ptr, "%c", separator);
+ /*
+ * Use four '%' characters below because of the
+ * following two conversions:
+ *
+ * 1) sprintf: %%%%r -> %%r
+ * 2) printk : %%r -> %r
+ */
if (operand->flags & OPERAND_GPR)
- ptr += sprintf(ptr, "%%r%i", value);
+ ptr += sprintf(ptr, "%%%%r%i", value);
else if (operand->flags & OPERAND_FPR)
- ptr += sprintf(ptr, "%%f%i", value);
+ ptr += sprintf(ptr, "%%%%f%i", value);
else if (operand->flags & OPERAND_AR)
- ptr += sprintf(ptr, "%%a%i", value);
+ ptr += sprintf(ptr, "%%%%a%i", value);
else if (operand->flags & OPERAND_CR)
- ptr += sprintf(ptr, "%%c%i", value);
+ ptr += sprintf(ptr, "%%%%c%i", value);
else if (operand->flags & OPERAND_VR)
- ptr += sprintf(ptr, "%%v%i", value);
+ ptr += sprintf(ptr, "%%%%v%i", value);
else if (operand->flags & OPERAND_PCREL)
ptr += sprintf(ptr, "%lx", (signed int) value
+ addr);
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 1255c6c5353e..301ee9c70688 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -26,6 +26,7 @@
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/page.h>
+#include <asm/ptrace.h>
#define ARCH_OFFSET 4
@@ -59,19 +60,6 @@ __HEAD
.long 0x020006e0,0x20000050
.org 0x200
-#
-# subroutine to set architecture mode
-#
-.Lsetmode:
- mvi __LC_AR_MODE_ID,1 # set esame flag
- slr %r0,%r0 # set cpuid to zero
- lhi %r1,2 # mode 2 = esame (dump)
- sigp %r1,%r0,0x12 # switch to esame mode
- bras %r13,0f
- .fill 16,4,0x0
-0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
- sam31 # switch to 31 bit addressing mode
- br %r14
#
# subroutine to wait for end I/O
@@ -159,7 +147,14 @@ __HEAD
.long 0x02200050,0x00000000
iplstart:
- bas %r14,.Lsetmode # Immediately switch to 64 bit mode
+ mvi __LC_AR_MODE_ID,1 # set esame flag
+ slr %r0,%r0 # set cpuid to zero
+ lhi %r1,2 # mode 2 = esame (dump)
+ sigp %r1,%r0,0x12 # switch to esame mode
+ bras %r13,0f
+ .fill 16,4,0x0
+0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
+ sam31 # switch to 31 bit addressing mode
lh %r1,0xb8 # test if subchannel number
bct %r1,.Lnoload # is valid
l %r1,0xb8 # load ipl subchannel number
@@ -269,71 +264,6 @@ iplstart:
.Lcpuid:.fill 8,1,0
#
-# SALIPL loader support. Based on a patch by Rob van der Heij.
-# This entry point is called directly from the SALIPL loader and
-# doesn't need a builtin ipl record.
-#
- .org 0x800
-ENTRY(start)
- stm %r0,%r15,0x07b0 # store registers
- bas %r14,.Lsetmode # Immediately switch to 64 bit mode
- basr %r12,%r0
-.base:
- l %r11,.parm
- l %r8,.cmd # pointer to command buffer
-
- ltr %r9,%r9 # do we have SALIPL parameters?
- bp .sk8x8
-
- mvc 0(64,%r8),0x00b0 # copy saved registers
- xc 64(240-64,%r8),0(%r8) # remainder of buffer
- tr 0(64,%r8),.lowcase
- b .gotr
-.sk8x8:
- mvc 0(240,%r8),0(%r9) # copy iplparms into buffer
-.gotr:
- slr %r0,%r0
- st %r0,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r11)
- st %r0,INITRD_START+ARCH_OFFSET-PARMAREA(%r11)
- j startup # continue with startup
-.cmd: .long COMMAND_LINE # address of command line buffer
-.parm: .long PARMAREA
-.lowcase:
- .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
- .byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
- .byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17
- .byte 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f
- .byte 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27
- .byte 0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f
- .byte 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37
- .byte 0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f
- .byte 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47
- .byte 0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f
- .byte 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57
- .byte 0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f
- .byte 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67
- .byte 0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f
- .byte 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77
- .byte 0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f
-
- .byte 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87
- .byte 0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f
- .byte 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97
- .byte 0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f
- .byte 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7
- .byte 0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf
- .byte 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7
- .byte 0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf
- .byte 0xc0,0x81,0x82,0x83,0x84,0x85,0x86,0x87 # .abcdefg
- .byte 0x88,0x89,0xca,0xcb,0xcc,0xcd,0xce,0xcf # hi
- .byte 0xd0,0x91,0x92,0x93,0x94,0x95,0x96,0x97 # .jklmnop
- .byte 0x98,0x99,0xda,0xdb,0xdc,0xdd,0xde,0xdf # qr
- .byte 0xe0,0xe1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7 # ..stuvwx
- .byte 0xa8,0xa9,0xea,0xeb,0xec,0xed,0xee,0xef # yz
- .byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7
- .byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
-
-#
# startup-code at 0x10000, running in absolute addressing mode
# this is called either by the ipl loader or directly by PSW restart
# or linload or SALIPL
@@ -364,7 +294,7 @@ ENTRY(startup_kdump)
bras %r13,0f
.fill 16,4,0x0
0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
- sam31 # switch to 31 bit addressing mode
+ sam64 # switch to 64 bit addressing mode
basr %r13,0 # get base
.LPG0:
xc 0x200(256),0x200 # partially clear lowcore
@@ -395,7 +325,7 @@ ENTRY(startup_kdump)
jnz 1b
j 4f
2: l %r15,.Lstack-.LPG0(%r13)
- ahi %r15,-96
+ ahi %r15,-STACK_FRAME_OVERHEAD
la %r2,.Lals_string-.LPG0(%r13)
l %r3,.Lsclp_print-.LPG0(%r13)
basr %r14,%r3
@@ -429,8 +359,7 @@ ENTRY(startup_kdump)
.long 1, 0xc0000000
#endif
4:
- /* Continue with 64bit startup code in head64.S */
- sam64 # switch to 64 bit mode
+ /* Continue with startup code in head64.S */
jg startup_continue
.align 8
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index f6d8acd7e136..b1f0a90f933b 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -121,6 +121,7 @@ static char *dump_type_str(enum dump_type type)
* Must be in data section since the bss section
* is not cleared when these are accessed.
*/
+static u8 ipl_ssid __attribute__((__section__(".data"))) = 0;
static u16 ipl_devno __attribute__((__section__(".data"))) = 0;
u32 ipl_flags __attribute__((__section__(".data"))) = 0;
@@ -197,6 +198,33 @@ static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \
return snprintf(page, PAGE_SIZE, _format, ##args); \
}
+#define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ unsigned long long ssid, devno; \
+ \
+ if (sscanf(buf, "0.%llx.%llx\n", &ssid, &devno) != 2) \
+ return -EINVAL; \
+ \
+ if (ssid > __MAX_SSID || devno > __MAX_SUBCHANNEL) \
+ return -EINVAL; \
+ \
+ _ipl_blk.ssid = ssid; \
+ _ipl_blk.devno = devno; \
+ return len; \
+}
+
+#define DEFINE_IPL_CCW_ATTR_RW(_prefix, _name, _ipl_blk) \
+IPL_ATTR_SHOW_FN(_prefix, _name, "0.%x.%04x\n", \
+ _ipl_blk.ssid, _ipl_blk.devno); \
+IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk); \
+static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
+ __ATTR(_name, (S_IRUGO | S_IWUSR), \
+ sys_##_prefix##_##_name##_show, \
+ sys_##_prefix##_##_name##_store) \
+
#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value) \
IPL_ATTR_SHOW_FN(_prefix, _name, _format, _value) \
static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
@@ -395,7 +423,7 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj,
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- return sprintf(page, "0.0.%04x\n", ipl_devno);
+ return sprintf(page, "0.%x.%04x\n", ipl_ssid, ipl_devno);
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
return sprintf(page, "0.0.%04x\n", ipl->ipl_info.fcp.devno);
@@ -687,21 +715,14 @@ static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr,
char *buf, loff_t off, size_t count)
{
+ size_t scpdata_len = count;
size_t padding;
- size_t scpdata_len;
-
- if (off < 0)
- return -EINVAL;
- if (off >= DIAG308_SCPDATA_SIZE)
- return -ENOSPC;
- if (count > DIAG308_SCPDATA_SIZE - off)
- count = DIAG308_SCPDATA_SIZE - off;
-
- memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf + off, count);
- scpdata_len = off + count;
+ if (off)
+ return -EINVAL;
+ memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf, count);
if (scpdata_len % 8) {
padding = 8 - (scpdata_len % 8);
memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len,
@@ -717,7 +738,7 @@ static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
}
static struct bin_attribute sys_reipl_fcp_scp_data_attr =
__BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_fcp_scpdata_read,
- reipl_fcp_scpdata_write, PAGE_SIZE);
+ reipl_fcp_scpdata_write, DIAG308_SCPDATA_SIZE);
static struct bin_attribute *reipl_fcp_bin_attrs[] = {
&sys_reipl_fcp_scp_data_attr,
@@ -814,9 +835,7 @@ static struct attribute_group reipl_fcp_attr_group = {
};
/* CCW reipl device attributes */
-
-DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
- reipl_block_ccw->ipl_info.ccw.devno);
+DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ipl_info.ccw);
/* NSS wrapper */
static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
@@ -1056,8 +1075,8 @@ static void __reipl_run(void *unused)
switch (reipl_method) {
case REIPL_METHOD_CCW_CIO:
+ devid.ssid = reipl_block_ccw->ipl_info.ccw.ssid;
devid.devno = reipl_block_ccw->ipl_info.ccw.devno;
- devid.ssid = 0;
reipl_ccw_dev(&devid);
break;
case REIPL_METHOD_CCW_VM:
@@ -1192,6 +1211,7 @@ static int __init reipl_ccw_init(void)
reipl_block_ccw_init(reipl_block_ccw);
if (ipl_info.type == IPL_TYPE_CCW) {
+ reipl_block_ccw->ipl_info.ccw.ssid = ipl_ssid;
reipl_block_ccw->ipl_info.ccw.devno = ipl_devno;
reipl_block_ccw_fill_parms(reipl_block_ccw);
}
@@ -1336,9 +1356,7 @@ static struct attribute_group dump_fcp_attr_group = {
};
/* CCW dump device attributes */
-
-DEFINE_IPL_ATTR_RW(dump_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
- dump_block_ccw->ipl_info.ccw.devno);
+DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ipl_info.ccw);
static struct attribute *dump_ccw_attrs[] = {
&sys_dump_ccw_device_attr.attr,
@@ -1418,8 +1436,8 @@ static void __dump_run(void *unused)
switch (dump_method) {
case DUMP_METHOD_CCW_CIO:
+ devid.ssid = dump_block_ccw->ipl_info.ccw.ssid;
devid.devno = dump_block_ccw->ipl_info.ccw.devno;
- devid.ssid = 0;
reipl_ccw_dev(&devid);
break;
case DUMP_METHOD_CCW_VM:
@@ -1939,14 +1957,14 @@ void __init setup_ipl(void)
ipl_info.type = get_ipl_type();
switch (ipl_info.type) {
case IPL_TYPE_CCW:
+ ipl_info.data.ccw.dev_id.ssid = ipl_ssid;
ipl_info.data.ccw.dev_id.devno = ipl_devno;
- ipl_info.data.ccw.dev_id.ssid = 0;
break;
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
+ ipl_info.data.fcp.dev_id.ssid = 0;
ipl_info.data.fcp.dev_id.devno =
IPL_PARMBLOCK_START->ipl_info.fcp.devno;
- ipl_info.data.fcp.dev_id.ssid = 0;
ipl_info.data.fcp.wwpn = IPL_PARMBLOCK_START->ipl_info.fcp.wwpn;
ipl_info.data.fcp.lun = IPL_PARMBLOCK_START->ipl_info.fcp.lun;
break;
@@ -1978,6 +1996,7 @@ void __init ipl_save_parameters(void)
if (cio_get_iplinfo(&iplinfo))
return;
+ ipl_ssid = iplinfo.ssid;
ipl_devno = iplinfo.devno;
ipl_flags |= IPL_DEVNO_VALID;
if (!iplinfo.is_qdio)
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 688a3aad9c79..114ee8b96f17 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -243,11 +243,7 @@ unsigned long arch_align_stack(unsigned long sp)
static inline unsigned long brk_rnd(void)
{
- /* 8MB for 32bit, 1GB for 64bit */
- if (is_32bit_task())
- return (get_random_int() & 0x7ffUL) << PAGE_SHIFT;
- else
- return (get_random_int() & 0x3ffffUL) << PAGE_SHIFT;
+ return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT;
}
unsigned long arch_randomize_brk(struct mm_struct *mm)
diff --git a/arch/s390/kernel/sclp.c b/arch/s390/kernel/sclp.c
index fa0bdff1d413..9fe7781a45cd 100644
--- a/arch/s390/kernel/sclp.c
+++ b/arch/s390/kernel/sclp.c
@@ -21,7 +21,7 @@ static void _sclp_wait_int(void)
__ctl_load(cr0_new, 0, 0);
psw_ext_save = S390_lowcore.external_new_psw;
- psw_mask = __extract_psw() & (PSW_MASK_EA | PSW_MASK_BA);
+ psw_mask = __extract_psw();
S390_lowcore.external_new_psw.mask = psw_mask;
psw_wait.mask = psw_mask | PSW_MASK_EXT | PSW_MASK_WAIT;
S390_lowcore.ext_int_code = 0;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index ce0cbd6ba7ca..c837bcacf218 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -764,9 +764,6 @@ static int __init setup_hwcaps(void)
get_cpu_id(&cpu_id);
add_device_randomness(&cpu_id, sizeof(cpu_id));
switch (cpu_id.machine) {
- case 0x9672:
- strcpy(elf_platform, "g5");
- break;
case 0x2064:
case 0x2066:
default: /* Use "z900" as default for 64 bit kernels. */
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 8c56929c8d82..5378c3ea1b98 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -382,3 +382,4 @@ SYSCALL(sys_sendmsg,compat_sys_sendmsg) /* 370 */
SYSCALL(sys_recvfrom,compat_sys_recvfrom)
SYSCALL(sys_recvmsg,compat_sys_recvmsg)
SYSCALL(sys_shutdown,sys_shutdown)
+SYSCALL(sys_mlock2,compat_sys_mlock2)
diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c
index 73239bb576c4..21a5df99552b 100644
--- a/arch/s390/kernel/trace.c
+++ b/arch/s390/kernel/trace.c
@@ -9,11 +9,11 @@
#define CREATE_TRACE_POINTS
#include <asm/trace/diag.h>
-EXPORT_TRACEPOINT_SYMBOL(diagnose);
+EXPORT_TRACEPOINT_SYMBOL(s390_diagnose);
static DEFINE_PER_CPU(unsigned int, diagnose_trace_depth);
-void trace_diagnose_norecursion(int diag_nr)
+void trace_s390_diagnose_norecursion(int diag_nr)
{
unsigned long flags;
unsigned int *depth;
@@ -22,7 +22,7 @@ void trace_diagnose_norecursion(int diag_nr)
depth = this_cpu_ptr(&diagnose_trace_depth);
if (*depth == 0) {
(*depth)++;
- trace_diagnose(diag_nr);
+ trace_s390_diagnose(diag_nr);
(*depth)--;
}
local_irq_restore(flags);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 373e32346d68..6a75352f453c 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1030,8 +1030,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
src_id, 0);
/* sending vcpu invalid */
- if (src_id >= KVM_MAX_VCPUS ||
- kvm_get_vcpu(vcpu->kvm, src_id) == NULL)
+ if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL)
return -EINVAL;
if (sclp.has_sigpif)
@@ -1110,6 +1109,10 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
irq->u.emerg.code, 0);
+ /* sending vcpu invalid */
+ if (kvm_get_vcpu_by_id(vcpu->kvm, irq->u.emerg.code) == NULL)
+ return -EINVAL;
+
set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8fe2f1c722dc..846589281b04 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -342,12 +342,16 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
r = 0;
break;
case KVM_CAP_S390_VECTOR_REGISTERS:
- if (MACHINE_HAS_VX) {
+ mutex_lock(&kvm->lock);
+ if (atomic_read(&kvm->online_vcpus)) {
+ r = -EBUSY;
+ } else if (MACHINE_HAS_VX) {
set_kvm_facility(kvm->arch.model.fac->mask, 129);
set_kvm_facility(kvm->arch.model.fac->list, 129);
r = 0;
} else
r = -EINVAL;
+ mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
r ? "(not available)" : "(success)");
break;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 77191b85ea7a..d76b51cb4b62 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -660,7 +660,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
- if (!MACHINE_HAS_PFMF)
+ if (!test_kvm_facility(vcpu->kvm, 8))
return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index da690b69f9fe..77c22d685c7a 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -291,12 +291,8 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
u16 cpu_addr, u32 parameter, u64 *status_reg)
{
int rc;
- struct kvm_vcpu *dst_vcpu;
+ struct kvm_vcpu *dst_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
- if (cpu_addr >= KVM_MAX_VCPUS)
- return SIGP_CC_NOT_OPERATIONAL;
-
- dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
@@ -478,7 +474,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
if (order_code == SIGP_EXTERNAL_CALL) {
- dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
BUG_ON(dest_vcpu == NULL);
kvm_s390_vcpu_wakeup(dest_vcpu);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index c3c07d3505ba..c722400c7697 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -48,37 +48,13 @@ EXPORT_SYMBOL(zero_page_mask);
static void __init setup_zero_pages(void)
{
- struct cpuid cpu_id;
unsigned int order;
struct page *page;
int i;
- get_cpu_id(&cpu_id);
- switch (cpu_id.machine) {
- case 0x9672: /* g5 */
- case 0x2064: /* z900 */
- case 0x2066: /* z900 */
- case 0x2084: /* z990 */
- case 0x2086: /* z990 */
- case 0x2094: /* z9-109 */
- case 0x2096: /* z9-109 */
- order = 0;
- break;
- case 0x2097: /* z10 */
- case 0x2098: /* z10 */
- case 0x2817: /* z196 */
- case 0x2818: /* z196 */
- order = 2;
- break;
- case 0x2827: /* zEC12 */
- case 0x2828: /* zEC12 */
- order = 5;
- break;
- case 0x2964: /* z13 */
- default:
- order = 7;
- break;
- }
+ /* Latest machines require a mapping granularity of 512KB */
+ order = 7;
+
/* Limit number of empty zero pages for small memory sizes */
while (order > 2 && (totalram_pages >> 10) < (1UL << order))
order--;
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 6e552af08c76..ea01477b4aa6 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -31,9 +31,6 @@
#include <linux/security.h>
#include <asm/pgalloc.h>
-unsigned long mmap_rnd_mask;
-static unsigned long mmap_align_mask;
-
static unsigned long stack_maxrandom_size(void)
{
if (!(current->flags & PF_RANDOMIZE))
@@ -62,10 +59,7 @@ static inline int mmap_is_legacy(void)
unsigned long arch_mmap_rnd(void)
{
- if (is_32bit_task())
- return (get_random_int() & 0x7ff) << PAGE_SHIFT;
- else
- return (get_random_int() & mmap_rnd_mask) << PAGE_SHIFT;
+ return (get_random_int() & MMAP_RND_MASK) << PAGE_SHIFT;
}
static unsigned long mmap_base_legacy(unsigned long rnd)
@@ -92,7 +86,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct vm_unmapped_area_info info;
- int do_color_align;
if (len > TASK_SIZE - mmap_min_addr)
return -ENOMEM;
@@ -108,15 +101,14 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
return addr;
}
- do_color_align = 0;
- if (filp || (flags & MAP_SHARED))
- do_color_align = !is_32bit_task();
-
info.flags = 0;
info.length = len;
info.low_limit = mm->mmap_base;
info.high_limit = TASK_SIZE;
- info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0;
+ if (filp || (flags & MAP_SHARED))
+ info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
+ else
+ info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
return vm_unmapped_area(&info);
}
@@ -130,7 +122,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
- int do_color_align;
/* requested length too big for entire address space */
if (len > TASK_SIZE - mmap_min_addr)
@@ -148,15 +139,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
return addr;
}
- do_color_align = 0;
- if (filp || (flags & MAP_SHARED))
- do_color_align = !is_32bit_task();
-
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = max(PAGE_SIZE, mmap_min_addr);
info.high_limit = mm->mmap_base;
- info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0;
+ if (filp || (flags & MAP_SHARED))
+ info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
+ else
+ info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
addr = vm_unmapped_area(&info);
@@ -254,35 +244,3 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
mm->get_unmapped_area = s390_get_unmapped_area_topdown;
}
}
-
-static int __init setup_mmap_rnd(void)
-{
- struct cpuid cpu_id;
-
- get_cpu_id(&cpu_id);
- switch (cpu_id.machine) {
- case 0x9672:
- case 0x2064:
- case 0x2066:
- case 0x2084:
- case 0x2086:
- case 0x2094:
- case 0x2096:
- case 0x2097:
- case 0x2098:
- case 0x2817:
- case 0x2818:
- case 0x2827:
- case 0x2828:
- mmap_rnd_mask = 0x7ffUL;
- mmap_align_mask = 0UL;
- break;
- case 0x2964: /* z13 */
- default:
- mmap_rnd_mask = 0x3ff80UL;
- mmap_align_mask = 0x7fUL;
- break;
- }
- return 0;
-}
-early_initcall(setup_mmap_rnd);
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 37d10f74425a..d348f2c09a1e 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -33,7 +33,7 @@ unsigned long *dma_alloc_cpu_table(void)
return NULL;
for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
- *entry = ZPCI_TABLE_INVALID | ZPCI_TABLE_PROTECTED;
+ *entry = ZPCI_TABLE_INVALID;
return table;
}
@@ -51,7 +51,7 @@ static unsigned long *dma_alloc_page_table(void)
return NULL;
for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
- *entry = ZPCI_PTE_INVALID | ZPCI_TABLE_PROTECTED;
+ *entry = ZPCI_PTE_INVALID;
return table;
}
@@ -95,7 +95,7 @@ static unsigned long *dma_get_page_table_origin(unsigned long *entry)
return pto;
}
-static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
{
unsigned long *sto, *pto;
unsigned int rtx, sx, px;
@@ -114,20 +114,10 @@ static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr
return &pto[px];
}
-void dma_update_cpu_trans(unsigned long *dma_table, void *page_addr,
- dma_addr_t dma_addr, int flags)
+void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
{
- unsigned long *entry;
-
- entry = dma_walk_cpu_trans(dma_table, dma_addr);
- if (!entry) {
- WARN_ON_ONCE(1);
- return;
- }
-
if (flags & ZPCI_PTE_INVALID) {
invalidate_pt_entry(entry);
- return;
} else {
set_pt_pfaa(entry, page_addr);
validate_pt_entry(entry);
@@ -146,18 +136,25 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
u8 *page_addr = (u8 *) (pa & PAGE_MASK);
dma_addr_t start_dma_addr = dma_addr;
unsigned long irq_flags;
+ unsigned long *entry;
int i, rc = 0;
if (!nr_pages)
return -EINVAL;
spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
- if (!zdev->dma_table)
+ if (!zdev->dma_table) {
+ rc = -EINVAL;
goto no_refresh;
+ }
for (i = 0; i < nr_pages; i++) {
- dma_update_cpu_trans(zdev->dma_table, page_addr, dma_addr,
- flags);
+ entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+ if (!entry) {
+ rc = -ENOMEM;
+ goto undo_cpu_trans;
+ }
+ dma_update_cpu_trans(entry, page_addr, flags);
page_addr += PAGE_SIZE;
dma_addr += PAGE_SIZE;
}
@@ -176,6 +173,18 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
nr_pages * PAGE_SIZE);
+undo_cpu_trans:
+ if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
+ flags = ZPCI_PTE_INVALID;
+ while (i-- > 0) {
+ page_addr -= PAGE_SIZE;
+ dma_addr -= PAGE_SIZE;
+ entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+ if (!entry)
+ break;
+ dma_update_cpu_trans(entry, page_addr, flags);
+ }
+ }
no_refresh:
spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
@@ -260,6 +269,16 @@ out:
spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
}
+static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
+{
+ struct {
+ unsigned long rc;
+ unsigned long addr;
+ } __packed data = {rc, addr};
+
+ zpci_err_hex(&data, sizeof(data));
+}
+
static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction direction,
@@ -270,33 +289,40 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
unsigned long pa = page_to_phys(page) + offset;
int flags = ZPCI_PTE_VALID;
dma_addr_t dma_addr;
+ int ret;
/* This rounds up number of pages based on size and offset */
nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
iommu_page_index = dma_alloc_iommu(zdev, nr_pages);
- if (iommu_page_index == -1)
+ if (iommu_page_index == -1) {
+ ret = -ENOSPC;
goto out_err;
+ }
/* Use rounded up size */
size = nr_pages * PAGE_SIZE;
dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;
- if (dma_addr + size > zdev->end_dma)
+ if (dma_addr + size > zdev->end_dma) {
+ ret = -ERANGE;
goto out_free;
+ }
if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
flags |= ZPCI_TABLE_PROTECTED;
- if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) {
- atomic64_add(nr_pages, &zdev->mapped_pages);
- return dma_addr + (offset & ~PAGE_MASK);
- }
+ ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
+ if (ret)
+ goto out_free;
+
+ atomic64_add(nr_pages, &zdev->mapped_pages);
+ return dma_addr + (offset & ~PAGE_MASK);
out_free:
dma_free_iommu(zdev, iommu_page_index, nr_pages);
out_err:
zpci_err("map error:\n");
- zpci_err_hex(&pa, sizeof(pa));
+ zpci_err_dma(ret, pa);
return DMA_ERROR_CODE;
}
@@ -306,14 +332,16 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
{
struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
unsigned long iommu_page_index;
- int npages;
+ int npages, ret;
npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
dma_addr = dma_addr & PAGE_MASK;
- if (dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
- ZPCI_TABLE_PROTECTED | ZPCI_PTE_INVALID)) {
+ ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
+ ZPCI_PTE_INVALID);
+ if (ret) {
zpci_err("unmap error:\n");
- zpci_err_hex(&dma_addr, sizeof(dma_addr));
+ zpci_err_dma(ret, dma_addr);
+ return;
}
atomic64_add(npages, &zdev->unmapped_pages);
diff --git a/arch/sh/include/uapi/asm/unistd_64.h b/arch/sh/include/uapi/asm/unistd_64.h
index e6820c86e8c7..47ebd5b5ed55 100644
--- a/arch/sh/include/uapi/asm/unistd_64.h
+++ b/arch/sh/include/uapi/asm/unistd_64.h
@@ -278,7 +278,7 @@
#define __NR_fsetxattr 256
#define __NR_getxattr 257
#define __NR_lgetxattr 258
-#define __NR_fgetxattr 269
+#define __NR_fgetxattr 259
#define __NR_listxattr 260
#define __NR_llistxattr 261
#define __NR_flistxattr 262
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 7cfd7f153966..4dca18347ee9 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -10,7 +10,7 @@
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2009 Jaswinder Singh Rajput
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
* Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
*
* ppc:
diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h
index 370ca1e71ffb..93310837c2df 100644
--- a/arch/sparc/include/asm/elf_64.h
+++ b/arch/sparc/include/asm/elf_64.h
@@ -95,6 +95,7 @@
* really available. So we simply advertise only "crypto" support.
*/
#define HWCAP_SPARC_CRYPTO 0x04000000 /* CRYPTO insns available */
+#define HWCAP_SPARC_ADI 0x08000000 /* ADI available */
#define CORE_DUMP_USE_REGSET
diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h
index efe9479f837b..1c26d440d288 100644
--- a/arch/sparc/include/uapi/asm/unistd.h
+++ b/arch/sparc/include/uapi/asm/unistd.h
@@ -417,8 +417,13 @@
#define __NR_bpf 349
#define __NR_execveat 350
#define __NR_membarrier 351
+#define __NR_userfaultfd 352
+#define __NR_bind 353
+#define __NR_listen 354
+#define __NR_setsockopt 355
+#define __NR_mlock2 356
-#define NR_syscalls 352
+#define NR_syscalls 357
/* Bitmask values returned from kern_features system call. */
#define KERN_FEATURE_MIXED_MODE_STACK 0x00000001
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 3d61fcae7ee3..f2d30cab5b3f 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -946,6 +946,12 @@ ENTRY(__retl_one)
mov 1, %o0
ENDPROC(__retl_one)
+ENTRY(__retl_one_fp)
+ VISExitHalf
+ retl
+ mov 1, %o0
+ENDPROC(__retl_one_fp)
+
ENTRY(__ret_one_asi)
wr %g0, ASI_AIUS, %asi
ret
@@ -958,6 +964,13 @@ ENTRY(__retl_one_asi)
mov 1, %o0
ENDPROC(__retl_one_asi)
+ENTRY(__retl_one_asi_fp)
+ wr %g0, ASI_AIUS, %asi
+ VISExitHalf
+ retl
+ mov 1, %o0
+ENDPROC(__retl_one_asi_fp)
+
ENTRY(__retl_o1)
retl
mov %o1, %o0
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index b0da5aedb336..6596f66ce112 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -9,7 +9,7 @@
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2009 Jaswinder Singh Rajput
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
*/
#include <linux/perf_event.h>
@@ -1828,11 +1828,18 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
void
perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
+ u64 saved_fault_address = current_thread_info()->fault_address;
+ u8 saved_fault_code = get_thread_fault_code();
+ mm_segment_t old_fs;
+
perf_callchain_store(entry, regs->tpc);
if (!current->mm)
return;
+ old_fs = get_fs();
+ set_fs(USER_DS);
+
flushw_user();
pagefault_disable();
@@ -1843,4 +1850,8 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
perf_callchain_user_64(entry, regs);
pagefault_enable();
+
+ set_fs(old_fs);
+ set_thread_fault_code(saved_fault_code);
+ current_thread_info()->fault_address = saved_fault_address;
}
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
index 39f0c662f4c8..d08bdaffdbfc 100644
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -73,7 +73,13 @@ rtrap_nmi: ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
andn %l1, %l4, %l1
srl %l4, 20, %l4
ba,pt %xcc, rtrap_no_irq_enable
- wrpr %l4, %pil
+ nop
+ /* Do not actually set the %pil here. We will do that
+ * below after we clear PSTATE_IE in the %pstate register.
+ * If we re-enable interrupts here, we can recurse down
+ * the hardirq stack potentially endlessly, causing a
+ * stack overflow.
+ */
.align 64
.globl rtrap_irq, rtrap, irqsz_patchme, rtrap_xcall
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index f7b261749383..f3185e2b028b 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -380,7 +380,8 @@ static const char *hwcaps[] = {
*/
"mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2",
"ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau",
- "ima", "cspare", "pause", "cbcond",
+ "ima", "cspare", "pause", "cbcond", NULL /*reserved for crypto */,
+ "adp",
};
static const char *crypto_hwcaps[] = {
@@ -396,7 +397,7 @@ void cpucap_info(struct seq_file *m)
seq_puts(m, "cpucaps\t\t: ");
for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
unsigned long bit = 1UL << i;
- if (caps & bit) {
+ if (hwcaps[i] && (caps & bit)) {
seq_printf(m, "%s%s",
printed ? "," : "", hwcaps[i]);
printed++;
@@ -450,7 +451,7 @@ static void __init report_hwcaps(unsigned long caps)
for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
unsigned long bit = 1UL << i;
- if (caps & bit)
+ if (hwcaps[i] && (caps & bit))
report_one_hwcap(&printed, hwcaps[i]);
}
if (caps & HWCAP_SPARC_CRYPTO)
@@ -485,7 +486,7 @@ static unsigned long __init mdesc_cpu_hwcap_list(void)
for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
unsigned long bit = 1UL << i;
- if (!strcmp(prop, hwcaps[i])) {
+ if (hwcaps[i] && !strcmp(prop, hwcaps[i])) {
caps |= bit;
break;
}
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index cc23b62b6e38..e663b6c78de2 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -35,18 +35,18 @@ sys_call_table:
/*80*/ .long sys_setgroups16, sys_getpgrp, sys_setgroups, sys_setitimer, sys_ftruncate64
/*85*/ .long sys_swapon, sys_getitimer, sys_setuid, sys_sethostname, sys_setgid
/*90*/ .long sys_dup2, sys_setfsuid, sys_fcntl, sys_select, sys_setfsgid
-/*95*/ .long sys_fsync, sys_setpriority, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
+/*95*/ .long sys_fsync, sys_setpriority, sys_socket, sys_connect, sys_accept
/*100*/ .long sys_getpriority, sys_rt_sigreturn, sys_rt_sigaction, sys_rt_sigprocmask, sys_rt_sigpending
/*105*/ .long sys_rt_sigtimedwait, sys_rt_sigqueueinfo, sys_rt_sigsuspend, sys_setresuid, sys_getresuid
-/*110*/ .long sys_setresgid, sys_getresgid, sys_setregid, sys_nis_syscall, sys_nis_syscall
-/*115*/ .long sys_getgroups, sys_gettimeofday, sys_getrusage, sys_nis_syscall, sys_getcwd
+/*110*/ .long sys_setresgid, sys_getresgid, sys_setregid, sys_recvmsg, sys_sendmsg
+/*115*/ .long sys_getgroups, sys_gettimeofday, sys_getrusage, sys_getsockopt, sys_getcwd
/*120*/ .long sys_readv, sys_writev, sys_settimeofday, sys_fchown16, sys_fchmod
-/*125*/ .long sys_nis_syscall, sys_setreuid16, sys_setregid16, sys_rename, sys_truncate
-/*130*/ .long sys_ftruncate, sys_flock, sys_lstat64, sys_nis_syscall, sys_nis_syscall
-/*135*/ .long sys_nis_syscall, sys_mkdir, sys_rmdir, sys_utimes, sys_stat64
-/*140*/ .long sys_sendfile64, sys_nis_syscall, sys_futex, sys_gettid, sys_getrlimit
+/*125*/ .long sys_recvfrom, sys_setreuid16, sys_setregid16, sys_rename, sys_truncate
+/*130*/ .long sys_ftruncate, sys_flock, sys_lstat64, sys_sendto, sys_shutdown
+/*135*/ .long sys_socketpair, sys_mkdir, sys_rmdir, sys_utimes, sys_stat64
+/*140*/ .long sys_sendfile64, sys_getpeername, sys_futex, sys_gettid, sys_getrlimit
/*145*/ .long sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write
-/*150*/ .long sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
+/*150*/ .long sys_getsockname, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
/*155*/ .long sys_fcntl64, sys_inotify_rm_watch, sys_statfs, sys_fstatfs, sys_oldumount
/*160*/ .long sys_sched_setaffinity, sys_sched_getaffinity, sys_getdomainname, sys_setdomainname, sys_nis_syscall
/*165*/ .long sys_quotactl, sys_set_tid_address, sys_mount, sys_ustat, sys_setxattr
@@ -87,4 +87,5 @@ sys_call_table:
/*335*/ .long sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev
/*340*/ .long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
/*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
-/*350*/ .long sys_execveat, sys_membarrier
+/*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
+/*355*/ .long sys_setsockopt, sys_mlock2
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index f229468a7479..1557121f4cdc 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -37,15 +37,15 @@ sys_call_table32:
/*80*/ .word sys_setgroups16, sys_getpgrp, sys_setgroups, compat_sys_setitimer, sys32_ftruncate64
.word sys_swapon, compat_sys_getitimer, sys_setuid, sys_sethostname, sys_setgid
/*90*/ .word sys_dup2, sys_setfsuid, compat_sys_fcntl, sys32_select, sys_setfsgid
- .word sys_fsync, sys_setpriority, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
+ .word sys_fsync, sys_setpriority, sys_socket, sys_connect, sys_accept
/*100*/ .word sys_getpriority, sys32_rt_sigreturn, compat_sys_rt_sigaction, compat_sys_rt_sigprocmask, compat_sys_rt_sigpending
.word compat_sys_rt_sigtimedwait, compat_sys_rt_sigqueueinfo, compat_sys_rt_sigsuspend, sys_setresuid, sys_getresuid
-/*110*/ .word sys_setresgid, sys_getresgid, sys_setregid, sys_nis_syscall, sys_nis_syscall
- .word sys_getgroups, compat_sys_gettimeofday, compat_sys_getrusage, sys_nis_syscall, sys_getcwd
+/*110*/ .word sys_setresgid, sys_getresgid, sys_setregid, compat_sys_recvmsg, compat_sys_sendmsg
+ .word sys_getgroups, compat_sys_gettimeofday, compat_sys_getrusage, compat_sys_getsockopt, sys_getcwd
/*120*/ .word compat_sys_readv, compat_sys_writev, compat_sys_settimeofday, sys_fchown16, sys_fchmod
- .word sys_nis_syscall, sys_setreuid16, sys_setregid16, sys_rename, compat_sys_truncate
-/*130*/ .word compat_sys_ftruncate, sys_flock, compat_sys_lstat64, sys_nis_syscall, sys_nis_syscall
- .word sys_nis_syscall, sys_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64
+ .word sys_recvfrom, sys_setreuid16, sys_setregid16, sys_rename, compat_sys_truncate
+/*130*/ .word compat_sys_ftruncate, sys_flock, compat_sys_lstat64, sys_sendto, sys_shutdown
+ .word sys_socketpair, sys_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64
/*140*/ .word sys_sendfile64, sys_nis_syscall, sys32_futex, sys_gettid, compat_sys_getrlimit
.word compat_sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write
/*150*/ .word sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
@@ -88,7 +88,8 @@ sys_call_table32:
.word sys_syncfs, compat_sys_sendmmsg, sys_setns, compat_sys_process_vm_readv, compat_sys_process_vm_writev
/*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
.word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
-/*350*/ .word sys32_execveat, sys_membarrier
+/*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
+ .word compat_sys_setsockopt, sys_mlock2
#endif /* CONFIG_COMPAT */
@@ -168,4 +169,5 @@ sys_call_table:
.word sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev
/*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
-/*350*/ .word sys64_execveat, sys_membarrier
+/*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
+ .word sys_setsockopt, sys_mlock2
diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S
index 119ccb9a54f4..d5242b8c4f94 100644
--- a/arch/sparc/lib/NG2copy_from_user.S
+++ b/arch/sparc/lib/NG2copy_from_user.S
@@ -11,6 +11,14 @@
.text; \
.align 4;
+#define EX_LD_FP(x) \
+98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+ .word 98b, __retl_one_asi_fp;\
+ .text; \
+ .align 4;
+
#ifndef ASI_AIUS
#define ASI_AIUS 0x11
#endif
diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S
index 7fe1ccefd9d0..4e962d993b10 100644
--- a/arch/sparc/lib/NG2copy_to_user.S
+++ b/arch/sparc/lib/NG2copy_to_user.S
@@ -11,6 +11,14 @@
.text; \
.align 4;
+#define EX_ST_FP(x) \
+98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+ .word 98b, __retl_one_asi_fp;\
+ .text; \
+ .align 4;
+
#ifndef ASI_AIUS
#define ASI_AIUS 0x11
#endif
diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S
index 30eee6e8a81b..d5f585df2f3f 100644
--- a/arch/sparc/lib/NG2memcpy.S
+++ b/arch/sparc/lib/NG2memcpy.S
@@ -34,10 +34,16 @@
#ifndef EX_LD
#define EX_LD(x) x
#endif
+#ifndef EX_LD_FP
+#define EX_LD_FP(x) x
+#endif
#ifndef EX_ST
#define EX_ST(x) x
#endif
+#ifndef EX_ST_FP
+#define EX_ST_FP(x) x
+#endif
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
@@ -134,40 +140,40 @@
fsrc2 %x6, %f12; \
fsrc2 %x7, %f14;
#define FREG_LOAD_1(base, x0) \
- EX_LD(LOAD(ldd, base + 0x00, %x0))
+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0))
#define FREG_LOAD_2(base, x0, x1) \
- EX_LD(LOAD(ldd, base + 0x00, %x0)); \
- EX_LD(LOAD(ldd, base + 0x08, %x1));
+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1));
#define FREG_LOAD_3(base, x0, x1, x2) \
- EX_LD(LOAD(ldd, base + 0x00, %x0)); \
- EX_LD(LOAD(ldd, base + 0x08, %x1)); \
- EX_LD(LOAD(ldd, base + 0x10, %x2));
+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+ EX_LD_FP(LOAD(ldd, base + 0x10, %x2));
#define FREG_LOAD_4(base, x0, x1, x2, x3) \
- EX_LD(LOAD(ldd, base + 0x00, %x0)); \
- EX_LD(LOAD(ldd, base + 0x08, %x1)); \
- EX_LD(LOAD(ldd, base + 0x10, %x2)); \
- EX_LD(LOAD(ldd, base + 0x18, %x3));
+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+ EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
+ EX_LD_FP(LOAD(ldd, base + 0x18, %x3));
#define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
- EX_LD(LOAD(ldd, base + 0x00, %x0)); \
- EX_LD(LOAD(ldd, base + 0x08, %x1)); \
- EX_LD(LOAD(ldd, base + 0x10, %x2)); \
- EX_LD(LOAD(ldd, base + 0x18, %x3)); \
- EX_LD(LOAD(ldd, base + 0x20, %x4));
+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+ EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
+ EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
+ EX_LD_FP(LOAD(ldd, base + 0x20, %x4));
#define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
- EX_LD(LOAD(ldd, base + 0x00, %x0)); \
- EX_LD(LOAD(ldd, base + 0x08, %x1)); \
- EX_LD(LOAD(ldd, base + 0x10, %x2)); \
- EX_LD(LOAD(ldd, base + 0x18, %x3)); \
- EX_LD(LOAD(ldd, base + 0x20, %x4)); \
- EX_LD(LOAD(ldd, base + 0x28, %x5));
+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+ EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
+ EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
+ EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
+ EX_LD_FP(LOAD(ldd, base + 0x28, %x5));
#define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
- EX_LD(LOAD(ldd, base + 0x00, %x0)); \
- EX_LD(LOAD(ldd, base + 0x08, %x1)); \
- EX_LD(LOAD(ldd, base + 0x10, %x2)); \
- EX_LD(LOAD(ldd, base + 0x18, %x3)); \
- EX_LD(LOAD(ldd, base + 0x20, %x4)); \
- EX_LD(LOAD(ldd, base + 0x28, %x5)); \
- EX_LD(LOAD(ldd, base + 0x30, %x6));
+ EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
+ EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
+ EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
+ EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
+ EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
+ EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \
+ EX_LD_FP(LOAD(ldd, base + 0x30, %x6));
.register %g2,#scratch
.register %g3,#scratch
@@ -275,11 +281,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
nop
/* fall through for 0 < low bits < 8 */
110: sub %o4, 64, %g2
- EX_LD(LOAD_BLK(%g2, %f0))
-1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
- EX_LD(LOAD_BLK(%o4, %f16))
+ EX_LD_FP(LOAD_BLK(%g2, %f0))
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+ EX_LD_FP(LOAD_BLK(%o4, %f16))
FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
- EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
subcc %g1, 64, %g1
add %o4, 64, %o4
@@ -290,10 +296,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
120: sub %o4, 56, %g2
FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
-1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
- EX_LD(LOAD_BLK(%o4, %f16))
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+ EX_LD_FP(LOAD_BLK(%o4, %f16))
FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
- EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
subcc %g1, 64, %g1
add %o4, 64, %o4
@@ -304,10 +310,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
130: sub %o4, 48, %g2
FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
-1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
- EX_LD(LOAD_BLK(%o4, %f16))
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+ EX_LD_FP(LOAD_BLK(%o4, %f16))
FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
- EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
subcc %g1, 64, %g1
add %o4, 64, %o4
@@ -318,10 +324,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
140: sub %o4, 40, %g2
FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
-1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
- EX_LD(LOAD_BLK(%o4, %f16))
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+ EX_LD_FP(LOAD_BLK(%o4, %f16))
FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
- EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
FREG_MOVE_5(f22, f24, f26, f28, f30)
subcc %g1, 64, %g1
add %o4, 64, %o4
@@ -332,10 +338,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
150: sub %o4, 32, %g2
FREG_LOAD_4(%g2, f0, f2, f4, f6)
-1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
- EX_LD(LOAD_BLK(%o4, %f16))
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+ EX_LD_FP(LOAD_BLK(%o4, %f16))
FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
- EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
FREG_MOVE_4(f24, f26, f28, f30)
subcc %g1, 64, %g1
add %o4, 64, %o4
@@ -346,10 +352,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
160: sub %o4, 24, %g2
FREG_LOAD_3(%g2, f0, f2, f4)
-1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
- EX_LD(LOAD_BLK(%o4, %f16))
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+ EX_LD_FP(LOAD_BLK(%o4, %f16))
FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
- EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
FREG_MOVE_3(f26, f28, f30)
subcc %g1, 64, %g1
add %o4, 64, %o4
@@ -360,10 +366,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
170: sub %o4, 16, %g2
FREG_LOAD_2(%g2, f0, f2)
-1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
- EX_LD(LOAD_BLK(%o4, %f16))
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+ EX_LD_FP(LOAD_BLK(%o4, %f16))
FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
- EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
FREG_MOVE_2(f28, f30)
subcc %g1, 64, %g1
add %o4, 64, %o4
@@ -374,10 +380,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
180: sub %o4, 8, %g2
FREG_LOAD_1(%g2, f0)
-1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
- EX_LD(LOAD_BLK(%o4, %f16))
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+ EX_LD_FP(LOAD_BLK(%o4, %f16))
FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
- EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
FREG_MOVE_1(f30)
subcc %g1, 64, %g1
add %o4, 64, %o4
@@ -387,10 +393,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
nop
190:
-1: EX_ST(STORE_INIT(%g0, %o4 + %g3))
+1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
subcc %g1, 64, %g1
- EX_LD(LOAD_BLK(%o4, %f0))
- EX_ST(STORE_BLK(%f0, %o4 + %g3))
+ EX_LD_FP(LOAD_BLK(%o4, %f0))
+ EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
add %o4, 64, %o4
bne,pt %xcc, 1b
LOAD(prefetch, %o4 + 64, #one_read)
diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S
index fd9f903ffa32..2e8ee7ad07a9 100644
--- a/arch/sparc/lib/NG4copy_from_user.S
+++ b/arch/sparc/lib/NG4copy_from_user.S
@@ -11,6 +11,14 @@
.text; \
.align 4;
+#define EX_LD_FP(x) \
+98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+ .word 98b, __retl_one_asi_fp;\
+ .text; \
+ .align 4;
+
#ifndef ASI_AIUS
#define ASI_AIUS 0x11
#endif
diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S
index 9744c4540a8d..be0bf4590df8 100644
--- a/arch/sparc/lib/NG4copy_to_user.S
+++ b/arch/sparc/lib/NG4copy_to_user.S
@@ -11,6 +11,14 @@
.text; \
.align 4;
+#define EX_ST_FP(x) \
+98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+ .word 98b, __retl_one_asi_fp;\
+ .text; \
+ .align 4;
+
#ifndef ASI_AIUS
#define ASI_AIUS 0x11
#endif
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
index 83aeeb1dffdb..8e13ee1f4454 100644
--- a/arch/sparc/lib/NG4memcpy.S
+++ b/arch/sparc/lib/NG4memcpy.S
@@ -48,10 +48,16 @@
#ifndef EX_LD
#define EX_LD(x) x
#endif
+#ifndef EX_LD_FP
+#define EX_LD_FP(x) x
+#endif
#ifndef EX_ST
#define EX_ST(x) x
#endif
+#ifndef EX_ST_FP
+#define EX_ST_FP(x) x
+#endif
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
@@ -210,17 +216,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %o2, %o4, %o2
alignaddr %o1, %g0, %g1
add %o1, %o4, %o1
- EX_LD(LOAD(ldd, %g1 + 0x00, %f0))
-1: EX_LD(LOAD(ldd, %g1 + 0x08, %f2))
+ EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0))
+1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2))
subcc %o4, 0x40, %o4
- EX_LD(LOAD(ldd, %g1 + 0x10, %f4))
- EX_LD(LOAD(ldd, %g1 + 0x18, %f6))
- EX_LD(LOAD(ldd, %g1 + 0x20, %f8))
- EX_LD(LOAD(ldd, %g1 + 0x28, %f10))
- EX_LD(LOAD(ldd, %g1 + 0x30, %f12))
- EX_LD(LOAD(ldd, %g1 + 0x38, %f14))
+ EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4))
+ EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6))
+ EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8))
+ EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10))
+ EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12))
+ EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14))
faligndata %f0, %f2, %f16
- EX_LD(LOAD(ldd, %g1 + 0x40, %f0))
+ EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0))
faligndata %f2, %f4, %f18
add %g1, 0x40, %g1
faligndata %f4, %f6, %f20
@@ -229,14 +235,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
faligndata %f10, %f12, %f26
faligndata %f12, %f14, %f28
faligndata %f14, %f0, %f30
- EX_ST(STORE(std, %f16, %o0 + 0x00))
- EX_ST(STORE(std, %f18, %o0 + 0x08))
- EX_ST(STORE(std, %f20, %o0 + 0x10))
- EX_ST(STORE(std, %f22, %o0 + 0x18))
- EX_ST(STORE(std, %f24, %o0 + 0x20))
- EX_ST(STORE(std, %f26, %o0 + 0x28))
- EX_ST(STORE(std, %f28, %o0 + 0x30))
- EX_ST(STORE(std, %f30, %o0 + 0x38))
+ EX_ST_FP(STORE(std, %f16, %o0 + 0x00))
+ EX_ST_FP(STORE(std, %f18, %o0 + 0x08))
+ EX_ST_FP(STORE(std, %f20, %o0 + 0x10))
+ EX_ST_FP(STORE(std, %f22, %o0 + 0x18))
+ EX_ST_FP(STORE(std, %f24, %o0 + 0x20))
+ EX_ST_FP(STORE(std, %f26, %o0 + 0x28))
+ EX_ST_FP(STORE(std, %f28, %o0 + 0x30))
+ EX_ST_FP(STORE(std, %f30, %o0 + 0x38))
add %o0, 0x40, %o0
bne,pt %icc, 1b
LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S
index a6ae2ea04bf5..ecc5692fa2b4 100644
--- a/arch/sparc/lib/U1copy_from_user.S
+++ b/arch/sparc/lib/U1copy_from_user.S
@@ -11,6 +11,14 @@
.text; \
.align 4;
+#define EX_LD_FP(x) \
+98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+ .word 98b, __retl_one_fp;\
+ .text; \
+ .align 4;
+
#define FUNC_NAME ___copy_from_user
#define LOAD(type,addr,dest) type##a [addr] %asi, dest
#define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_AIUS, dest
diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S
index f4b970eeb485..9eea392e44d4 100644
--- a/arch/sparc/lib/U1copy_to_user.S
+++ b/arch/sparc/lib/U1copy_to_user.S
@@ -11,6 +11,14 @@
.text; \
.align 4;
+#define EX_ST_FP(x) \
+98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+ .word 98b, __retl_one_fp;\
+ .text; \
+ .align 4;
+
#define FUNC_NAME ___copy_to_user
#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS
#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_AIUS
diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S
index b67142b7768e..3e6209ebb7d7 100644
--- a/arch/sparc/lib/U1memcpy.S
+++ b/arch/sparc/lib/U1memcpy.S
@@ -25,10 +25,16 @@
#ifndef EX_LD
#define EX_LD(x) x
#endif
+#ifndef EX_LD_FP
+#define EX_LD_FP(x) x
+#endif
#ifndef EX_ST
#define EX_ST(x) x
#endif
+#ifndef EX_ST_FP
+#define EX_ST_FP(x) x
+#endif
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
@@ -73,8 +79,8 @@
faligndata %f8, %f9, %f62;
#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \
- EX_LD(LOAD_BLK(%src, %fdest)); \
- EX_ST(STORE_BLK(%fsrc, %dest)); \
+ EX_LD_FP(LOAD_BLK(%src, %fdest)); \
+ EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
add %src, 0x40, %src; \
subcc %len, 0x40, %len; \
be,pn %xcc, jmptgt; \
@@ -89,12 +95,12 @@
#define DO_SYNC membar #Sync;
#define STORE_SYNC(dest, fsrc) \
- EX_ST(STORE_BLK(%fsrc, %dest)); \
+ EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
add %dest, 0x40, %dest; \
DO_SYNC
#define STORE_JUMP(dest, fsrc, target) \
- EX_ST(STORE_BLK(%fsrc, %dest)); \
+ EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
add %dest, 0x40, %dest; \
ba,pt %xcc, target; \
nop;
@@ -103,7 +109,7 @@
subcc %left, 8, %left;\
bl,pn %xcc, 95f; \
faligndata %f0, %f1, %f48; \
- EX_ST(STORE(std, %f48, %dest)); \
+ EX_ST_FP(STORE(std, %f48, %dest)); \
add %dest, 8, %dest;
#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \
@@ -160,8 +166,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
and %g2, 0x38, %g2
1: subcc %g1, 0x1, %g1
- EX_LD(LOAD(ldub, %o1 + 0x00, %o3))
- EX_ST(STORE(stb, %o3, %o1 + %GLOBAL_SPARE))
+ EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
+ EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE))
bgu,pt %XCC, 1b
add %o1, 0x1, %o1
@@ -172,20 +178,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
be,pt %icc, 3f
alignaddr %o1, %g0, %o1
- EX_LD(LOAD(ldd, %o1, %f4))
-1: EX_LD(LOAD(ldd, %o1 + 0x8, %f6))
+ EX_LD_FP(LOAD(ldd, %o1, %f4))
+1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
add %o1, 0x8, %o1
subcc %g2, 0x8, %g2
faligndata %f4, %f6, %f0
- EX_ST(STORE(std, %f0, %o0))
+ EX_ST_FP(STORE(std, %f0, %o0))
be,pn %icc, 3f
add %o0, 0x8, %o0
- EX_LD(LOAD(ldd, %o1 + 0x8, %f4))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
add %o1, 0x8, %o1
subcc %g2, 0x8, %g2
faligndata %f6, %f4, %f0
- EX_ST(STORE(std, %f0, %o0))
+ EX_ST_FP(STORE(std, %f0, %o0))
bne,pt %icc, 1b
add %o0, 0x8, %o0
@@ -208,13 +214,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
add %g1, %GLOBAL_SPARE, %g1
subcc %o2, %g3, %o2
- EX_LD(LOAD_BLK(%o1, %f0))
+ EX_LD_FP(LOAD_BLK(%o1, %f0))
add %o1, 0x40, %o1
add %g1, %g3, %g1
- EX_LD(LOAD_BLK(%o1, %f16))
+ EX_LD_FP(LOAD_BLK(%o1, %f16))
add %o1, 0x40, %o1
sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
- EX_LD(LOAD_BLK(%o1, %f32))
+ EX_LD_FP(LOAD_BLK(%o1, %f32))
add %o1, 0x40, %o1
/* There are 8 instances of the unrolled loop,
@@ -426,28 +432,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
62: FINISH_VISCHUNK(o0, f44, f46, g3)
63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3)
-93: EX_LD(LOAD(ldd, %o1, %f2))
+93: EX_LD_FP(LOAD(ldd, %o1, %f2))
add %o1, 8, %o1
subcc %g3, 8, %g3
faligndata %f0, %f2, %f8
- EX_ST(STORE(std, %f8, %o0))
+ EX_ST_FP(STORE(std, %f8, %o0))
bl,pn %xcc, 95f
add %o0, 8, %o0
- EX_LD(LOAD(ldd, %o1, %f0))
+ EX_LD_FP(LOAD(ldd, %o1, %f0))
add %o1, 8, %o1
subcc %g3, 8, %g3
faligndata %f2, %f0, %f8
- EX_ST(STORE(std, %f8, %o0))
+ EX_ST_FP(STORE(std, %f8, %o0))
bge,pt %xcc, 93b
add %o0, 8, %o0
95: brz,pt %o2, 2f
mov %g1, %o1
-1: EX_LD(LOAD(ldub, %o1, %o3))
+1: EX_LD_FP(LOAD(ldub, %o1, %o3))
add %o1, 1, %o1
subcc %o2, 1, %o2
- EX_ST(STORE(stb, %o3, %o0))
+ EX_ST_FP(STORE(stb, %o3, %o0))
bne,pt %xcc, 1b
add %o0, 1, %o0
diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S
index b1acd1331c33..88ad73d86fe4 100644
--- a/arch/sparc/lib/U3copy_from_user.S
+++ b/arch/sparc/lib/U3copy_from_user.S
@@ -11,6 +11,14 @@
.text; \
.align 4;
+#define EX_LD_FP(x) \
+98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+ .word 98b, __retl_one_fp;\
+ .text; \
+ .align 4;
+
#define FUNC_NAME U3copy_from_user
#define LOAD(type,addr,dest) type##a [addr] %asi, dest
#define EX_RETVAL(x) 0
diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S
index ef1e493afdfa..845139d75537 100644
--- a/arch/sparc/lib/U3copy_to_user.S
+++ b/arch/sparc/lib/U3copy_to_user.S
@@ -11,6 +11,14 @@
.text; \
.align 4;
+#define EX_ST_FP(x) \
+98: x; \
+ .section __ex_table,"a";\
+ .align 4; \
+ .word 98b, __retl_one_fp;\
+ .text; \
+ .align 4;
+
#define FUNC_NAME U3copy_to_user
#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS
#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_AIUS
diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S
index 7cae9cc6a204..491ee69e4995 100644
--- a/arch/sparc/lib/U3memcpy.S
+++ b/arch/sparc/lib/U3memcpy.S
@@ -24,10 +24,16 @@
#ifndef EX_LD
#define EX_LD(x) x
#endif
+#ifndef EX_LD_FP
+#define EX_LD_FP(x) x
+#endif
#ifndef EX_ST
#define EX_ST(x) x
#endif
+#ifndef EX_ST_FP
+#define EX_ST_FP(x) x
+#endif
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
@@ -120,8 +126,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
and %g2, 0x38, %g2
1: subcc %g1, 0x1, %g1
- EX_LD(LOAD(ldub, %o1 + 0x00, %o3))
- EX_ST(STORE(stb, %o3, %o1 + GLOBAL_SPARE))
+ EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
+ EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE))
bgu,pt %XCC, 1b
add %o1, 0x1, %o1
@@ -132,20 +138,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
be,pt %icc, 3f
alignaddr %o1, %g0, %o1
- EX_LD(LOAD(ldd, %o1, %f4))
-1: EX_LD(LOAD(ldd, %o1 + 0x8, %f6))
+ EX_LD_FP(LOAD(ldd, %o1, %f4))
+1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
add %o1, 0x8, %o1
subcc %g2, 0x8, %g2
faligndata %f4, %f6, %f0
- EX_ST(STORE(std, %f0, %o0))
+ EX_ST_FP(STORE(std, %f0, %o0))
be,pn %icc, 3f
add %o0, 0x8, %o0
- EX_LD(LOAD(ldd, %o1 + 0x8, %f4))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
add %o1, 0x8, %o1
subcc %g2, 0x8, %g2
faligndata %f6, %f4, %f2
- EX_ST(STORE(std, %f2, %o0))
+ EX_ST_FP(STORE(std, %f2, %o0))
bne,pt %icc, 1b
add %o0, 0x8, %o0
@@ -155,25 +161,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
LOAD(prefetch, %o1 + 0x080, #one_read)
LOAD(prefetch, %o1 + 0x0c0, #one_read)
LOAD(prefetch, %o1 + 0x100, #one_read)
- EX_LD(LOAD(ldd, %o1 + 0x000, %f0))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0))
LOAD(prefetch, %o1 + 0x140, #one_read)
- EX_LD(LOAD(ldd, %o1 + 0x008, %f2))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
LOAD(prefetch, %o1 + 0x180, #one_read)
- EX_LD(LOAD(ldd, %o1 + 0x010, %f4))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
LOAD(prefetch, %o1 + 0x1c0, #one_read)
faligndata %f0, %f2, %f16
- EX_LD(LOAD(ldd, %o1 + 0x018, %f6))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
faligndata %f2, %f4, %f18
- EX_LD(LOAD(ldd, %o1 + 0x020, %f8))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
faligndata %f4, %f6, %f20
- EX_LD(LOAD(ldd, %o1 + 0x028, %f10))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
faligndata %f6, %f8, %f22
- EX_LD(LOAD(ldd, %o1 + 0x030, %f12))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
faligndata %f8, %f10, %f24
- EX_LD(LOAD(ldd, %o1 + 0x038, %f14))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
faligndata %f10, %f12, %f26
- EX_LD(LOAD(ldd, %o1 + 0x040, %f0))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE
add %o1, 0x40, %o1
@@ -184,26 +190,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
.align 64
1:
- EX_LD(LOAD(ldd, %o1 + 0x008, %f2))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
faligndata %f12, %f14, %f28
- EX_LD(LOAD(ldd, %o1 + 0x010, %f4))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
faligndata %f14, %f0, %f30
- EX_ST(STORE_BLK(%f16, %o0))
- EX_LD(LOAD(ldd, %o1 + 0x018, %f6))
+ EX_ST_FP(STORE_BLK(%f16, %o0))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
faligndata %f0, %f2, %f16
add %o0, 0x40, %o0
- EX_LD(LOAD(ldd, %o1 + 0x020, %f8))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
faligndata %f2, %f4, %f18
- EX_LD(LOAD(ldd, %o1 + 0x028, %f10))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
faligndata %f4, %f6, %f20
- EX_LD(LOAD(ldd, %o1 + 0x030, %f12))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
subcc %o3, 0x01, %o3
faligndata %f6, %f8, %f22
- EX_LD(LOAD(ldd, %o1 + 0x038, %f14))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
faligndata %f8, %f10, %f24
- EX_LD(LOAD(ldd, %o1 + 0x040, %f0))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
LOAD(prefetch, %o1 + 0x1c0, #one_read)
faligndata %f10, %f12, %f26
bg,pt %XCC, 1b
@@ -211,29 +217,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
/* Finally we copy the last full 64-byte block. */
2:
- EX_LD(LOAD(ldd, %o1 + 0x008, %f2))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
faligndata %f12, %f14, %f28
- EX_LD(LOAD(ldd, %o1 + 0x010, %f4))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
faligndata %f14, %f0, %f30
- EX_ST(STORE_BLK(%f16, %o0))
- EX_LD(LOAD(ldd, %o1 + 0x018, %f6))
+ EX_ST_FP(STORE_BLK(%f16, %o0))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
faligndata %f0, %f2, %f16
- EX_LD(LOAD(ldd, %o1 + 0x020, %f8))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
faligndata %f2, %f4, %f18
- EX_LD(LOAD(ldd, %o1 + 0x028, %f10))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
faligndata %f4, %f6, %f20
- EX_LD(LOAD(ldd, %o1 + 0x030, %f12))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
faligndata %f6, %f8, %f22
- EX_LD(LOAD(ldd, %o1 + 0x038, %f14))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
faligndata %f8, %f10, %f24
cmp %g1, 0
be,pt %XCC, 1f
add %o0, 0x40, %o0
- EX_LD(LOAD(ldd, %o1 + 0x040, %f0))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
1: faligndata %f10, %f12, %f26
faligndata %f12, %f14, %f28
faligndata %f14, %f0, %f30
- EX_ST(STORE_BLK(%f16, %o0))
+ EX_ST_FP(STORE_BLK(%f16, %o0))
add %o0, 0x40, %o0
add %o1, 0x40, %o1
membar #Sync
@@ -253,20 +259,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %o2, %g2, %o2
be,a,pt %XCC, 1f
- EX_LD(LOAD(ldd, %o1 + 0x00, %f0))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0))
-1: EX_LD(LOAD(ldd, %o1 + 0x08, %f2))
+1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2))
add %o1, 0x8, %o1
subcc %g2, 0x8, %g2
faligndata %f0, %f2, %f8
- EX_ST(STORE(std, %f8, %o0))
+ EX_ST_FP(STORE(std, %f8, %o0))
be,pn %XCC, 2f
add %o0, 0x8, %o0
- EX_LD(LOAD(ldd, %o1 + 0x08, %f0))
+ EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0))
add %o1, 0x8, %o1
subcc %g2, 0x8, %g2
faligndata %f2, %f0, %f8
- EX_ST(STORE(std, %f8, %o0))
+ EX_ST_FP(STORE(std, %f8, %o0))
bne,pn %XCC, 1b
add %o0, 0x8, %o0
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index 22564f5f2364..3e6e05a7c4c2 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -420,22 +420,9 @@ void bpf_jit_compile(struct bpf_prog *fp)
}
emit_reg_move(O7, r_saved_O7);
- switch (filter[0].code) {
- case BPF_RET | BPF_K:
- case BPF_LD | BPF_W | BPF_LEN:
- case BPF_LD | BPF_W | BPF_ABS:
- case BPF_LD | BPF_H | BPF_ABS:
- case BPF_LD | BPF_B | BPF_ABS:
- /* The first instruction sets the A register (or is
- * a "RET 'constant'")
- */
- break;
- default:
- /* Make sure we dont leak kernel information to the
- * user.
- */
+ /* Make sure we dont leak kernel information to the user. */
+ if (bpf_needs_clear_a(&filter[0]))
emit_clear(r_A); /* A = 0 */
- }
for (i = 0; i < flen; i++) {
unsigned int K = filter[i].k;
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 106c21bd7f44..8ec7a4599c08 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -176,8 +176,6 @@ config NR_CPUS
smaller kernel memory footprint results from using a smaller
value on chips with fewer tiles.
-if TILEGX
-
choice
prompt "Kernel page size"
default PAGE_SIZE_64KB
@@ -188,8 +186,11 @@ choice
connections, etc., it may be better to select 16KB, which uses
memory more efficiently at some cost in TLB performance.
- Note that this option is TILE-Gx specific; currently
- TILEPro page size is set by rebuilding the hypervisor.
+ Note that for TILEPro, you must also rebuild the hypervisor
+ with a matching page size.
+
+config PAGE_SIZE_4KB
+ bool "4KB" if TILEPRO
config PAGE_SIZE_16KB
bool "16KB"
@@ -199,8 +200,6 @@ config PAGE_SIZE_64KB
endchoice
-endif
-
source "kernel/Kconfig.hz"
config KEXEC
diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h
index 0ccda3c425be..25d5899497be 100644
--- a/arch/tile/include/asm/cmpxchg.h
+++ b/arch/tile/include/asm/cmpxchg.h
@@ -127,8 +127,6 @@ long long _atomic64_cmpxchg(long long *v, long long o, long long n);
#endif
-#define tas(ptr) xchg((ptr), 1)
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_TILE_CMPXCHG_H */
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index a213a8d84a95..8eca6a0e1762 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -20,15 +20,17 @@
#include <arch/chip.h>
/* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */
-#if defined(CONFIG_PAGE_SIZE_16KB)
+#if defined(CONFIG_PAGE_SIZE_4KB) /* tilepro only */
+#define PAGE_SHIFT 12
+#define CTX_PAGE_FLAG HV_CTX_PG_SM_4K
+#elif defined(CONFIG_PAGE_SIZE_16KB)
#define PAGE_SHIFT 14
#define CTX_PAGE_FLAG HV_CTX_PG_SM_16K
#elif defined(CONFIG_PAGE_SIZE_64KB)
#define PAGE_SHIFT 16
#define CTX_PAGE_FLAG HV_CTX_PG_SM_64K
#else
-#define PAGE_SHIFT HV_LOG2_DEFAULT_PAGE_SIZE_SMALL
-#define CTX_PAGE_FLAG 0
+#error Page size not specified in Kconfig
#endif
#define HPAGE_SHIFT HV_LOG2_DEFAULT_PAGE_SIZE_LARGE
diff --git a/arch/tile/kernel/perf_event.c b/arch/tile/kernel/perf_event.c
index bb509cee3b59..8767060d70fb 100644
--- a/arch/tile/kernel/perf_event.c
+++ b/arch/tile/kernel/perf_event.c
@@ -21,7 +21,7 @@
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2009 Jaswinder Singh Rajput
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
* Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
* Copyright (C) 2009 Google, Inc., Stephane Eranian
*/
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 25ed4098640e..e3abe6f3156d 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -131,7 +131,7 @@ export LDS_ELF_FORMAT := $(ELF_FORMAT)
# The wrappers will select whether using "malloc" or the kernel allocator.
LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc
-LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) -lrt
+LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt))
# Used by link-vmlinux.sh which has special support for um link
export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE)
diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c
index e697a4136707..e9f8445861dc 100644
--- a/arch/um/drivers/net_user.c
+++ b/arch/um/drivers/net_user.c
@@ -249,21 +249,23 @@ void close_addr(unsigned char *addr, unsigned char *netmask, void *arg)
char *split_if_spec(char *str, ...)
{
- char **arg, *end;
+ char **arg, *end, *ret = NULL;
va_list ap;
va_start(ap, str);
while ((arg = va_arg(ap, char **)) != NULL) {
if (*str == '\0')
- return NULL;
+ goto out;
end = strchr(str, ',');
if (end != str)
*arg = str;
if (end == NULL)
- return NULL;
+ goto out;
*end++ = '\0';
str = end;
}
+ ret = str;
+out:
va_end(ap);
- return str;
+ return ret;
}
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 57acbd67d85d..fc8be0e3a4ff 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -69,7 +69,7 @@ void do_signal(struct pt_regs *regs)
struct ksignal ksig;
int handled_sig = 0;
- while (get_signal(&ksig)) {
+ if (get_signal(&ksig)) {
handled_sig = 1;
/* Whee! Actually deliver the signal. */
handle_signal(&ksig, regs);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index db3622f22b61..258965d56beb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -349,6 +349,17 @@ config X86_FEATURE_NAMES
If in doubt, say Y.
+config X86_FAST_FEATURE_TESTS
+ bool "Fast CPU feature tests" if EMBEDDED
+ default y
+ ---help---
+ Some fast-paths in the kernel depend on the capabilities of the CPU.
+ Say Y here for the kernel to patch in the appropriate code at runtime
+ based on the capabilities of the CPU. The infrastructure for patching
+ code at runtime takes up some additional space; space-constrained
+ embedded systems may wish to say N here to produce smaller, slightly
+ slower code.
+
config X86_X2APIC
bool "Support x2apic"
depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST)
@@ -687,6 +698,14 @@ config PARAVIRT_SPINLOCKS
If you are unsure how to answer this question, answer Y.
+config QUEUED_LOCK_STAT
+ bool "Paravirt queued spinlock statistics"
+ depends on PARAVIRT_SPINLOCKS && DEBUG_FS && QUEUED_SPINLOCKS
+ ---help---
+ Enable the collection of statistical data on the slowpath
+ behavior of paravirtualized queued spinlocks and report
+ them on debugfs.
+
source "arch/x86/xen/Kconfig"
config KVM_GUEST
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 137dfa96aa14..110253ce83af 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -69,7 +69,7 @@ config X86_PTDUMP_CORE
def_bool n
config X86_PTDUMP
- bool "Export kernel pagetable layout to userspace via debugfs"
+ tristate "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
select DEBUG_FS
select X86_PTDUMP_CORE
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 0033e96c3f09..9011a88353de 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -23,7 +23,6 @@
#include <stdarg.h>
#include <linux/types.h>
#include <linux/edd.h>
-#include <asm/boot.h>
#include <asm/setup.h>
#include "bitops.h"
#include "ctype.h"
diff --git a/arch/x86/boot/video-mode.c b/arch/x86/boot/video-mode.c
index aa8a96b052e3..95c7a818c0ed 100644
--- a/arch/x86/boot/video-mode.c
+++ b/arch/x86/boot/video-mode.c
@@ -19,6 +19,8 @@
#include "video.h"
#include "vesa.h"
+#include <uapi/asm/boot.h>
+
/*
* Common variables
*/
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index 05111bb8d018..77780e386e9b 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -13,6 +13,8 @@
* Select video mode
*/
+#include <uapi/asm/boot.h>
+
#include "boot.h"
#include "video.h"
#include "vesa.h"
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index 722bacea040e..8baaff5af0b5 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c
@@ -125,7 +125,7 @@ static struct crypto_alg alg = {
static int __init chacha20_simd_mod_init(void)
{
- if (!cpu_has_ssse3)
+ if (!boot_cpu_has(X86_FEATURE_SSSE3))
return -ENODEV;
#ifdef CONFIG_AS_AVX2
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 81a595d75cf5..0e9871693f24 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -257,7 +257,7 @@ static int __init crc32c_intel_mod_init(void)
if (!x86_match_cpu(crc32c_cpu_id))
return -ENODEV;
#ifdef CONFIG_X86_64
- if (cpu_has_pclmulqdq) {
+ if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
alg.update = crc32c_pcl_intel_update;
alg.finup = crc32c_pcl_intel_finup;
alg.digest = crc32c_pcl_intel_digest;
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 3c71dd947c7b..e32206e09868 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -1,3 +1,5 @@
+#include <linux/jump_label.h>
+
/*
x86 function call convention, 64-bit:
@@ -232,3 +234,16 @@ For 32-bit we have the following conventions - kernel is built with
#endif /* CONFIG_X86_64 */
+/*
+ * This does 'call enter_from_user_mode' unless we can avoid it based on
+ * kernel config or using the static jump infrastructure.
+ */
+.macro CALL_enter_from_user_mode
+#ifdef CONFIG_CONTEXT_TRACKING
+#ifdef HAVE_JUMP_LABEL
+ STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0
+#endif
+ call enter_from_user_mode
+.Lafter_call_\@:
+#endif
+.endm
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index a89fdbc1f0be..03663740c866 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -421,7 +421,7 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
regs->ip = landing_pad;
/*
- * Fetch ECX from where the vDSO stashed it.
+ * Fetch EBP from where the vDSO stashed it.
*
* WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
*/
@@ -432,10 +432,10 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
* Micro-optimization: the pointer we're following is explicitly
* 32 bits, so it can't be out of range.
*/
- __get_user(*(u32 *)&regs->cx,
+ __get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp)
#else
- get_user(*(u32 *)&regs->cx,
+ get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp)
#endif
) {
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 3eb572ed3d7a..77d8c5112900 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -292,7 +292,7 @@ ENTRY(entry_SYSENTER_32)
movl TSS_sysenter_sp0(%esp), %esp
sysenter_past_esp:
pushl $__USER_DS /* pt_regs->ss */
- pushl %ecx /* pt_regs->cx */
+ pushl %ebp /* pt_regs->sp (stashed in bp) */
pushfl /* pt_regs->flags (except IF = 0) */
orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
pushl $__USER_CS /* pt_regs->cs */
@@ -308,8 +308,9 @@ sysenter_past_esp:
movl %esp, %eax
call do_fast_syscall_32
- testl %eax, %eax
- jz .Lsyscall_32_done
+ /* XEN PV guests always use IRET path */
+ ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
+ "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
/* Opportunistic SYSEXIT */
TRACE_IRQS_ON /* User mode traces as IRQs on. */
@@ -328,7 +329,8 @@ sysenter_past_esp:
* Return back to the vDSO, which will pop ecx and edx.
* Don't bother with DS and ES (they already contain __USER_DS).
*/
- ENABLE_INTERRUPTS_SYSEXIT
+ sti
+ sysexit
.pushsection .fixup, "ax"
2: movl $0, PT_FS(%esp)
@@ -551,11 +553,6 @@ ENTRY(native_iret)
iret
_ASM_EXTABLE(native_iret, iret_exc)
END(native_iret)
-
-ENTRY(native_irq_enable_sysexit)
- sti
- sysexit
-END(native_irq_enable_sysexit)
#endif
ENTRY(overflow)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 53616ca03244..9d34d3cfceb6 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -509,9 +509,18 @@ END(irq_entries_start)
* tracking that we're in kernel mode.
*/
SWAPGS
-#ifdef CONFIG_CONTEXT_TRACKING
- call enter_from_user_mode
-#endif
+
+ /*
+ * We need to tell lockdep that IRQs are off. We can't do this until
+ * we fix gsbase, and we should do it before enter_from_user_mode
+ * (which can take locks). Since TRACE_IRQS_OFF idempotent,
+ * the simplest way to handle it is to just call it twice if
+ * we enter from user mode. There's no reason to optimize this since
+ * TRACE_IRQS_OFF is a no-op if lockdep is off.
+ */
+ TRACE_IRQS_OFF
+
+ CALL_enter_from_user_mode
1:
/*
@@ -1049,12 +1058,16 @@ ENTRY(error_entry)
SWAPGS
.Lerror_entry_from_usermode_after_swapgs:
-#ifdef CONFIG_CONTEXT_TRACKING
- call enter_from_user_mode
-#endif
+ /*
+ * We need to tell lockdep that IRQs are off. We can't do this until
+ * we fix gsbase, and we should do it before enter_from_user_mode
+ * (which can take locks).
+ */
+ TRACE_IRQS_OFF
+ CALL_enter_from_user_mode
+ ret
.Lerror_entry_done:
-
TRACE_IRQS_OFF
ret
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index c3201830a85e..ff1c6d61f332 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -18,13 +18,6 @@
.section .entry.text, "ax"
-#ifdef CONFIG_PARAVIRT
-ENTRY(native_usergs_sysret32)
- swapgs
- sysretl
-ENDPROC(native_usergs_sysret32)
-#endif
-
/*
* 32-bit SYSENTER instruction entry.
*
@@ -63,7 +56,7 @@ ENTRY(entry_SYSENTER_compat)
/* Construct struct pt_regs on stack */
pushq $__USER32_DS /* pt_regs->ss */
- pushq %rcx /* pt_regs->sp */
+ pushq %rbp /* pt_regs->sp (stashed in bp) */
/*
* Push flags. This is nasty. First, interrupts are currently
@@ -82,14 +75,14 @@ ENTRY(entry_SYSENTER_compat)
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
- pushq %rcx /* pt_regs->cx (will be overwritten) */
+ pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 = 0 */
pushq %r8 /* pt_regs->r9 = 0 */
pushq %r8 /* pt_regs->r10 = 0 */
pushq %r8 /* pt_regs->r11 = 0 */
pushq %rbx /* pt_regs->rbx */
- pushq %rbp /* pt_regs->rbp */
+ pushq %rbp /* pt_regs->rbp (will be overwritten) */
pushq %r8 /* pt_regs->r12 = 0 */
pushq %r8 /* pt_regs->r13 = 0 */
pushq %r8 /* pt_regs->r14 = 0 */
@@ -103,15 +96,15 @@ ENTRY(entry_SYSENTER_compat)
* This needs to happen before enabling interrupts so that
* we don't get preempted with NT set.
*
- * NB.: sysenter_fix_flags is a label with the code under it moved
+ * NB.: .Lsysenter_fix_flags is a label with the code under it moved
* out-of-line as an optimization: NT is unlikely to be set in the
* majority of the cases and instead of polluting the I$ unnecessarily,
* we're keeping that code behind a branch which will predict as
* not-taken and therefore its instructions won't be fetched.
*/
testl $X86_EFLAGS_NT, EFLAGS(%rsp)
- jnz sysenter_fix_flags
-sysenter_flags_fixed:
+ jnz .Lsysenter_fix_flags
+.Lsysenter_flags_fixed:
/*
* User mode is traced as though IRQs are on, and SYSENTER
@@ -121,14 +114,15 @@ sysenter_flags_fixed:
movq %rsp, %rdi
call do_fast_syscall_32
- testl %eax, %eax
- jz .Lsyscall_32_done
+ /* XEN PV guests always use IRET path */
+ ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
+ "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
jmp sysret32_from_system_call
-sysenter_fix_flags:
+.Lsysenter_fix_flags:
pushq $X86_EFLAGS_FIXED
popfq
- jmp sysenter_flags_fixed
+ jmp .Lsysenter_flags_fixed
ENDPROC(entry_SYSENTER_compat)
/*
@@ -178,7 +172,7 @@ ENTRY(entry_SYSCALL_compat)
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
- pushq %rcx /* pt_regs->cx (will be overwritten) */
+ pushq %rbp /* pt_regs->cx (stashed in bp) */
pushq $-ENOSYS /* pt_regs->ax */
xorq %r8,%r8
pushq %r8 /* pt_regs->r8 = 0 */
@@ -186,7 +180,7 @@ ENTRY(entry_SYSCALL_compat)
pushq %r8 /* pt_regs->r10 = 0 */
pushq %r8 /* pt_regs->r11 = 0 */
pushq %rbx /* pt_regs->rbx */
- pushq %rbp /* pt_regs->rbp */
+ pushq %rbp /* pt_regs->rbp (will be overwritten) */
pushq %r8 /* pt_regs->r12 = 0 */
pushq %r8 /* pt_regs->r13 = 0 */
pushq %r8 /* pt_regs->r14 = 0 */
@@ -200,8 +194,9 @@ ENTRY(entry_SYSCALL_compat)
movq %rsp, %rdi
call do_fast_syscall_32
- testl %eax, %eax
- jz .Lsyscall_32_done
+ /* XEN PV guests always use IRET path */
+ ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
+ "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
/* Opportunistic SYSRET */
sysret32_from_system_call:
@@ -236,7 +231,8 @@ sysret32_from_system_call:
xorq %r9, %r9
xorq %r10, %r10
movq RSP-ORIG_RAX(%rsp), %rsp
- USERGS_SYSRET32
+ swapgs
+ sysretl
END(entry_SYSCALL_compat)
/*
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index ca94fa649251..8602f06c759f 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -17,8 +17,10 @@
#include <asm/vvar.h>
#include <asm/unistd.h>
#include <asm/msr.h>
+#include <asm/pvclock.h>
#include <linux/math64.h>
#include <linux/time.h>
+#include <linux/kernel.h>
#define gtod (&VVAR(vsyscall_gtod_data))
@@ -36,12 +38,12 @@ static notrace cycle_t vread_hpet(void)
}
#endif
-#ifndef BUILD_VDSO32
+#ifdef CONFIG_PARAVIRT_CLOCK
+extern u8 pvclock_page
+ __attribute__((visibility("hidden")));
+#endif
-#include <linux/kernel.h>
-#include <asm/vsyscall.h>
-#include <asm/fixmap.h>
-#include <asm/pvclock.h>
+#ifndef BUILD_VDSO32
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
@@ -60,75 +62,6 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
return ret;
}
-#ifdef CONFIG_PARAVIRT_CLOCK
-
-static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
-{
- const struct pvclock_vsyscall_time_info *pvti_base;
- int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
- int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
-
- BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
-
- pvti_base = (struct pvclock_vsyscall_time_info *)
- __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
-
- return &pvti_base[offset];
-}
-
-static notrace cycle_t vread_pvclock(int *mode)
-{
- const struct pvclock_vsyscall_time_info *pvti;
- cycle_t ret;
- u64 last;
- u32 version;
- u8 flags;
- unsigned cpu, cpu1;
-
-
- /*
- * Note: hypervisor must guarantee that:
- * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
- * 2. that per-CPU pvclock time info is updated if the
- * underlying CPU changes.
- * 3. that version is increased whenever underlying CPU
- * changes.
- *
- */
- do {
- cpu = __getcpu() & VGETCPU_CPU_MASK;
- /* TODO: We can put vcpu id into higher bits of pvti.version.
- * This will save a couple of cycles by getting rid of
- * __getcpu() calls (Gleb).
- */
-
- pvti = get_pvti(cpu);
-
- version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
-
- /*
- * Test we're still on the cpu as well as the version.
- * We could have been migrated just after the first
- * vgetcpu but before fetching the version, so we
- * wouldn't notice a version change.
- */
- cpu1 = __getcpu() & VGETCPU_CPU_MASK;
- } while (unlikely(cpu != cpu1 ||
- (pvti->pvti.version & 1) ||
- pvti->pvti.version != version));
-
- if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
- *mode = VCLOCK_NONE;
-
- /* refer to tsc.c read_tsc() comment for rationale */
- last = gtod->cycle_last;
-
- if (likely(ret >= last))
- return ret;
-
- return last;
-}
-#endif
#else
@@ -162,15 +95,77 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
return ret;
}
+#endif
+
#ifdef CONFIG_PARAVIRT_CLOCK
+static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
+{
+ return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
+}
static notrace cycle_t vread_pvclock(int *mode)
{
- *mode = VCLOCK_NONE;
- return 0;
-}
-#endif
+ const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
+ cycle_t ret;
+ u64 tsc, pvti_tsc;
+ u64 last, delta, pvti_system_time;
+ u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
+
+ /*
+ * Note: The kernel and hypervisor must guarantee that cpu ID
+ * number maps 1:1 to per-CPU pvclock time info.
+ *
+ * Because the hypervisor is entirely unaware of guest userspace
+ * preemption, it cannot guarantee that per-CPU pvclock time
+ * info is updated if the underlying CPU changes or that that
+ * version is increased whenever underlying CPU changes.
+ *
+ * On KVM, we are guaranteed that pvti updates for any vCPU are
+ * atomic as seen by *all* vCPUs. This is an even stronger
+ * guarantee than we get with a normal seqlock.
+ *
+ * On Xen, we don't appear to have that guarantee, but Xen still
+ * supplies a valid seqlock using the version field.
+
+ * We only do pvclock vdso timing at all if
+ * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
+ * mean that all vCPUs have matching pvti and that the TSC is
+ * synced, so we can just look at vCPU 0's pvti.
+ */
+ if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
+ *mode = VCLOCK_NONE;
+ return 0;
+ }
+
+ do {
+ version = pvti->version;
+
+ smp_rmb();
+
+ tsc = rdtsc_ordered();
+ pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
+ pvti_tsc_shift = pvti->tsc_shift;
+ pvti_system_time = pvti->system_time;
+ pvti_tsc = pvti->tsc_timestamp;
+
+ /* Make sure that the version double-check is last. */
+ smp_rmb();
+ } while (unlikely((version & 1) || version != pvti->version));
+
+ delta = tsc - pvti_tsc;
+ ret = pvti_system_time +
+ pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
+ pvti_tsc_shift);
+
+ /* refer to vread_tsc() comment for rationale */
+ last = gtod->cycle_last;
+
+ if (likely(ret >= last))
+ return ret;
+
+ return last;
+}
#endif
notrace static cycle_t vread_tsc(void)
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index de2c921025f5..4158acc17df0 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -25,7 +25,7 @@ SECTIONS
* segment.
*/
- vvar_start = . - 2 * PAGE_SIZE;
+ vvar_start = . - 3 * PAGE_SIZE;
vvar_page = vvar_start;
/* Place all vvars at the offsets in asm/vvar.h. */
@@ -36,6 +36,7 @@ SECTIONS
#undef EMIT_VVAR
hpet_page = vvar_start + PAGE_SIZE;
+ pvclock_page = vvar_start + 2 * PAGE_SIZE;
. = SIZEOF_HEADERS;
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 785d9922b106..491020b2826d 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -73,6 +73,7 @@ enum {
sym_vvar_start,
sym_vvar_page,
sym_hpet_page,
+ sym_pvclock_page,
sym_VDSO_FAKE_SECTION_TABLE_START,
sym_VDSO_FAKE_SECTION_TABLE_END,
};
@@ -80,6 +81,7 @@ enum {
const int special_pages[] = {
sym_vvar_page,
sym_hpet_page,
+ sym_pvclock_page,
};
struct vdso_sym {
@@ -91,6 +93,7 @@ struct vdso_sym required_syms[] = {
[sym_vvar_start] = {"vvar_start", true},
[sym_vvar_page] = {"vvar_page", true},
[sym_hpet_page] = {"hpet_page", true},
+ [sym_pvclock_page] = {"pvclock_page", true},
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
"VDSO_FAKE_SECTION_TABLE_START", false
},
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 93bd8452383f..3a1d9297074b 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -1,5 +1,5 @@
/*
- * Code for the vDSO. This version uses the old int $0x80 method.
+ * AT_SYSINFO entry point
*/
#include <asm/dwarf2.h>
@@ -21,35 +21,67 @@ __kernel_vsyscall:
/*
* Reshuffle regs so that all of any of the entry instructions
* will preserve enough state.
+ *
+ * A really nice entry sequence would be:
+ * pushl %edx
+ * pushl %ecx
+ * movl %esp, %ecx
+ *
+ * Unfortunately, naughty Android versions between July and December
+ * 2015 actually hardcode the traditional Linux SYSENTER entry
+ * sequence. That is severely broken for a number of reasons (ask
+ * anyone with an AMD CPU, for example). Nonetheless, we try to keep
+ * it working approximately as well as it ever worked.
+ *
+ * This link may eludicate some of the history:
+ * https://android-review.googlesource.com/#/q/Iac3295376d61ef83e713ac9b528f3b50aa780cd7
+ * personally, I find it hard to understand what's going on there.
+ *
+ * Note to future user developers: DO NOT USE SYSENTER IN YOUR CODE.
+ * Execute an indirect call to the address in the AT_SYSINFO auxv
+ * entry. That is the ONLY correct way to make a fast 32-bit system
+ * call on Linux. (Open-coding int $0x80 is also fine, but it's
+ * slow.)
*/
+ pushl %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ecx, 0
pushl %edx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edx, 0
- pushl %ecx
+ pushl %ebp
CFI_ADJUST_CFA_OFFSET 4
- CFI_REL_OFFSET ecx, 0
- movl %esp, %ecx
+ CFI_REL_OFFSET ebp, 0
+
+ #define SYSENTER_SEQUENCE "movl %esp, %ebp; sysenter"
+ #define SYSCALL_SEQUENCE "movl %ecx, %ebp; syscall"
#ifdef CONFIG_X86_64
/* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */
- ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \
- "syscall", X86_FEATURE_SYSCALL32
+ ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, \
+ SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32
#else
- ALTERNATIVE "", "sysenter", X86_FEATURE_SEP
+ ALTERNATIVE "", SYSENTER_SEQUENCE, X86_FEATURE_SEP
#endif
/* Enter using int $0x80 */
- movl (%esp), %ecx
int $0x80
GLOBAL(int80_landing_pad)
- /* Restore ECX and EDX in case they were clobbered. */
- popl %ecx
- CFI_RESTORE ecx
+ /*
+ * Restore EDX and ECX in case they were clobbered. EBP is not
+ * clobbered (the kernel restores it), but it's cleaner and
+ * probably faster to pop it than to adjust ESP using addl.
+ */
+ popl %ebp
+ CFI_RESTORE ebp
CFI_ADJUST_CFA_OFFSET -4
popl %edx
CFI_RESTORE edx
CFI_ADJUST_CFA_OFFSET -4
+ popl %ecx
+ CFI_RESTORE ecx
+ CFI_ADJUST_CFA_OFFSET -4
ret
CFI_ENDPROC
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 64df47148160..b8f69e264ac4 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -12,6 +12,7 @@
#include <linux/random.h>
#include <linux/elf.h>
#include <linux/cpu.h>
+#include <asm/pvclock.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
#include <asm/vdso.h>
@@ -100,6 +101,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
.name = "[vvar]",
.pages = no_pages,
};
+ struct pvclock_vsyscall_time_info *pvti;
if (calculate_addr) {
addr = vdso_addr(current->mm->start_stack,
@@ -169,6 +171,18 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
}
#endif
+ pvti = pvclock_pvti_cpu0_va();
+ if (pvti && image->sym_pvclock_page) {
+ ret = remap_pfn_range(vma,
+ text_start + image->sym_pvclock_page,
+ __pa(pvti) >> PAGE_SHIFT,
+ PAGE_SIZE,
+ PAGE_READONLY);
+
+ if (ret)
+ goto up_fail;
+ }
+
up_fail:
if (ret)
current->mm->context.vdso = NULL;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index a30316bf801a..c80f6b6f3da2 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -23,6 +23,11 @@
#define APIC_VERBOSE 1
#define APIC_DEBUG 2
+/* Macros for apic_extnmi which controls external NMI masking */
+#define APIC_EXTNMI_BSP 0 /* Default */
+#define APIC_EXTNMI_ALL 1
+#define APIC_EXTNMI_NONE 2
+
/*
* Define the default level of output to be very little
* This can be turned up by using apic=verbose for more
@@ -303,6 +308,7 @@ struct apic {
unsigned int *apicid);
/* ipi */
+ void (*send_IPI)(int cpu, int vector);
void (*send_IPI_mask)(const struct cpumask *mask, int vector);
void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
int vector);
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index ae5fb83e6d91..3e8674288198 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -3,7 +3,6 @@
#include <linux/compiler.h>
#include <linux/types.h>
-#include <asm/processor.h>
#include <asm/alternative.h>
#include <asm/cmpxchg.h>
#include <asm/rmwcc.h>
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index a11c30b77fb5..a984111135b1 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -3,7 +3,6 @@
#include <linux/compiler.h>
#include <linux/types.h>
-#include <asm/processor.h>
//#include <asm/cmpxchg.h>
/* An 64bit atomic type */
diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h
index 0d467b338835..a8303ebe089f 100644
--- a/arch/x86/include/asm/calgary.h
+++ b/arch/x86/include/asm/calgary.h
@@ -31,7 +31,7 @@
#include <asm/types.h>
struct iommu_table {
- struct cal_chipset_ops *chip_ops; /* chipset specific funcs */
+ const struct cal_chipset_ops *chip_ops; /* chipset specific funcs */
unsigned long it_base; /* mapped address of tce table */
unsigned long it_hint; /* Hint for next alloc */
unsigned long *it_map; /* A simple allocation bitmap for now */
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index f7e142926481..e4959d023af8 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -109,6 +109,6 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
#endif
-#define system_has_cmpxchg_double() cpu_has_cx8
+#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX8)
#endif /* _ASM_X86_CMPXCHG_32_H */
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 1af94697aae5..caa23a34c963 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -18,6 +18,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
cmpxchg_local((ptr), (o), (n)); \
})
-#define system_has_cmpxchg_double() cpu_has_cx16
+#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16)
#endif /* _ASM_X86_CMPXCHG_64_H */
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index bf2caa1dedc5..678637ad7476 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -36,4 +36,7 @@ extern int _debug_hotplug_cpu(int cpu, int action);
int mwait_usable(const struct cpuinfo_x86 *);
+unsigned int x86_family(unsigned int sig);
+unsigned int x86_model(unsigned int sig);
+unsigned int x86_stepping(unsigned int sig);
#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index e4f8010f22e0..7ad8c9464297 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -12,7 +12,7 @@
#include <asm/disabled-features.h>
#endif
-#define NCAPINTS 14 /* N 32-bit words worth of info */
+#define NCAPINTS 16 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@@ -181,22 +181,17 @@
/*
* Auxiliary flags: Linux defined - For features scattered in various
- * CPUID levels like 0x6, 0xA etc, word 7
+ * CPUID levels like 0x6, 0xA etc, word 7.
+ *
+ * Reuse free bits when adding new feature flags!
*/
-#define X86_FEATURE_IDA ( 7*32+ 0) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT ( 7*32+ 1) /* Always Running APIC Timer */
+
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-#define X86_FEATURE_PLN ( 7*32+ 5) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS ( 7*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_DTHERM ( 7*32+ 7) /* Digital Thermal Sensor */
+
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_HWP ( 7*32+ 10) /* "hwp" Intel HWP */
-#define X86_FEATURE_HWP_NOTIFY ( 7*32+ 11) /* Intel HWP_NOTIFY */
-#define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */
-#define X86_FEATURE_HWP_EPP ( 7*32+13) /* Intel HWP_EPP */
-#define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */
+
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
/* Virtualization flags: Linux defined, word 8 */
@@ -205,17 +200,9 @@
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
-#define X86_FEATURE_NPT ( 8*32+ 5) /* AMD Nested Page Table support */
-#define X86_FEATURE_LBRV ( 8*32+ 6) /* AMD LBR Virtualization support */
-#define X86_FEATURE_SVML ( 8*32+ 7) /* "svm_lock" AMD SVM locking MSR */
-#define X86_FEATURE_NRIPS ( 8*32+ 8) /* "nrip_save" AMD SVM next_rip save */
-#define X86_FEATURE_TSCRATEMSR ( 8*32+ 9) /* "tsc_scale" AMD TSC scaling support */
-#define X86_FEATURE_VMCBCLEAN ( 8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID ( 8*32+11) /* AMD flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
+
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
+#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
@@ -258,6 +245,30 @@
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
+/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
+#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
+#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
+#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
+#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
+#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
+#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+
+/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
+#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
+#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
+#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
+#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
+#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
+#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
+
/*
* BUG word(s)
*/
@@ -278,6 +289,26 @@
#include <asm/asm.h>
#include <linux/bitops.h>
+enum cpuid_leafs
+{
+ CPUID_1_EDX = 0,
+ CPUID_8000_0001_EDX,
+ CPUID_8086_0001_EDX,
+ CPUID_LNX_1,
+ CPUID_1_ECX,
+ CPUID_C000_0001_EDX,
+ CPUID_8000_0001_ECX,
+ CPUID_LNX_2,
+ CPUID_LNX_3,
+ CPUID_7_0_EBX,
+ CPUID_D_1_EAX,
+ CPUID_F_0_EDX,
+ CPUID_F_1_EDX,
+ CPUID_8000_0008_EBX,
+ CPUID_6_EAX,
+ CPUID_8000_000A_EDX,
+};
+
#ifdef CONFIG_X86_FEATURE_NAMES
extern const char * const x86_cap_flags[NCAPINTS*32];
extern const char * const x86_power_flags[32];
@@ -355,60 +386,31 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
} while (0)
#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU)
-#define cpu_has_de boot_cpu_has(X86_FEATURE_DE)
#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE)
#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC)
#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE)
#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC)
-#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP)
-#define cpu_has_mtrr boot_cpu_has(X86_FEATURE_MTRR)
-#define cpu_has_mmx boot_cpu_has(X86_FEATURE_MMX)
#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR)
#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM)
#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2)
-#define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3)
-#define cpu_has_ssse3 boot_cpu_has(X86_FEATURE_SSSE3)
#define cpu_has_aes boot_cpu_has(X86_FEATURE_AES)
#define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX)
#define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2)
-#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
-#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)
-#define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE)
-#define cpu_has_xstore_enabled boot_cpu_has(X86_FEATURE_XSTORE_EN)
-#define cpu_has_xcrypt boot_cpu_has(X86_FEATURE_XCRYPT)
-#define cpu_has_xcrypt_enabled boot_cpu_has(X86_FEATURE_XCRYPT_EN)
-#define cpu_has_ace2 boot_cpu_has(X86_FEATURE_ACE2)
-#define cpu_has_ace2_enabled boot_cpu_has(X86_FEATURE_ACE2_EN)
-#define cpu_has_phe boot_cpu_has(X86_FEATURE_PHE)
-#define cpu_has_phe_enabled boot_cpu_has(X86_FEATURE_PHE_EN)
-#define cpu_has_pmm boot_cpu_has(X86_FEATURE_PMM)
-#define cpu_has_pmm_enabled boot_cpu_has(X86_FEATURE_PMM_EN)
-#define cpu_has_ds boot_cpu_has(X86_FEATURE_DS)
-#define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS)
#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLUSH)
-#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS)
#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES)
#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT)
-#define cpu_has_xmm4_1 boot_cpu_has(X86_FEATURE_XMM4_1)
-#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2)
#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
-#define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT)
#define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES)
#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
-#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
-#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
-#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
-#define cpu_has_perfctr_l2 boot_cpu_has(X86_FEATURE_PERFCTR_L2)
-#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
-#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
-#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
-#define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT)
-#define cpu_has_bpext boot_cpu_has(X86_FEATURE_BPEXT)
-
-#if __GNUC__ >= 4
+/*
+ * Do not add any more of those clumsy macros - use static_cpu_has_safe() for
+ * fast paths and boot_cpu_has() otherwise!
+ */
+
+#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
extern void warn_pre_alternatives(void);
extern bool __static_cpu_has_safe(u16 bit);
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index f80d70009ff8..6d7d0e52ed5a 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -19,7 +19,6 @@
#include <asm/acpi.h>
#include <asm/apicdef.h>
#include <asm/page.h>
-#include <asm/pvclock.h>
#ifdef CONFIG_X86_32
#include <linux/threads.h>
#include <asm/kmap_types.h>
@@ -72,10 +71,6 @@ enum fixed_addresses {
#ifdef CONFIG_X86_VSYSCALL_EMULATION
VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT,
#endif
-#ifdef CONFIG_PARAVIRT_CLOCK
- PVCLOCK_FIXMAP_BEGIN,
- PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
-#endif
#endif
FIX_DBGP_BASE,
FIX_EARLYCON_MEM_BASE,
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 3c3550c3a4a3..eadcdd5bb946 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -224,18 +224,67 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
-/* xstate instruction fault handler: */
-#define xstate_fault(__err) \
- \
- ".section .fixup,\"ax\"\n" \
- \
- "3: movl $-2,%[_err]\n" \
- " jmp 2b\n" \
- \
- ".previous\n" \
- \
- _ASM_EXTABLE(1b, 3b) \
- : [_err] "=r" (__err)
+#define XSTATE_OP(op, st, lmask, hmask, err) \
+ asm volatile("1:" op "\n\t" \
+ "xor %[err], %[err]\n" \
+ "2:\n\t" \
+ ".pushsection .fixup,\"ax\"\n\t" \
+ "3: movl $-2,%[err]\n\t" \
+ "jmp 2b\n\t" \
+ ".popsection\n\t" \
+ _ASM_EXTABLE(1b, 3b) \
+ : [err] "=r" (err) \
+ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
+ : "memory")
+
+/*
+ * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact
+ * format and supervisor states in addition to modified optimization in
+ * XSAVEOPT.
+ *
+ * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
+ * supports modified optimization which is not supported by XSAVE.
+ *
+ * We use XSAVE as a fallback.
+ *
+ * The 661 label is defined in the ALTERNATIVE* macros as the address of the
+ * original instruction which gets replaced. We need to use it here as the
+ * address of the instruction where we might get an exception at.
+ */
+#define XSTATE_XSAVE(st, lmask, hmask, err) \
+ asm volatile(ALTERNATIVE_2(XSAVE, \
+ XSAVEOPT, X86_FEATURE_XSAVEOPT, \
+ XSAVES, X86_FEATURE_XSAVES) \
+ "\n" \
+ "xor %[err], %[err]\n" \
+ "3:\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "4: movl $-2, %[err]\n" \
+ "jmp 3b\n" \
+ ".popsection\n" \
+ _ASM_EXTABLE(661b, 4b) \
+ : [err] "=r" (err) \
+ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
+ : "memory")
+
+/*
+ * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
+ * XSAVE area format.
+ */
+#define XSTATE_XRESTORE(st, lmask, hmask, err) \
+ asm volatile(ALTERNATIVE(XRSTOR, \
+ XRSTORS, X86_FEATURE_XSAVES) \
+ "\n" \
+ "xor %[err], %[err]\n" \
+ "3:\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "4: movl $-2, %[err]\n" \
+ "jmp 3b\n" \
+ ".popsection\n" \
+ _ASM_EXTABLE(661b, 4b) \
+ : [err] "=r" (err) \
+ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
+ : "memory")
/*
* This function is called only during boot time when x86 caps are not set
@@ -246,22 +295,14 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
u64 mask = -1;
u32 lmask = mask;
u32 hmask = mask >> 32;
- int err = 0;
+ int err;
WARN_ON(system_state != SYSTEM_BOOTING);
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- asm volatile("1:"XSAVES"\n\t"
- "2:\n\t"
- xstate_fault(err)
- : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
- : "memory");
+ if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+ XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
else
- asm volatile("1:"XSAVE"\n\t"
- "2:\n\t"
- xstate_fault(err)
- : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
- : "memory");
+ XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
/* We should never fault when copying to a kernel buffer: */
WARN_ON_FPU(err);
@@ -276,22 +317,14 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
u64 mask = -1;
u32 lmask = mask;
u32 hmask = mask >> 32;
- int err = 0;
+ int err;
WARN_ON(system_state != SYSTEM_BOOTING);
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- asm volatile("1:"XRSTORS"\n\t"
- "2:\n\t"
- xstate_fault(err)
- : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
- : "memory");
+ if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+ XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
- asm volatile("1:"XRSTOR"\n\t"
- "2:\n\t"
- xstate_fault(err)
- : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
- : "memory");
+ XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
/* We should never fault when copying from a kernel buffer: */
WARN_ON_FPU(err);
@@ -305,33 +338,11 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
u64 mask = -1;
u32 lmask = mask;
u32 hmask = mask >> 32;
- int err = 0;
+ int err;
WARN_ON(!alternatives_patched);
- /*
- * If xsaves is enabled, xsaves replaces xsaveopt because
- * it supports compact format and supervisor states in addition to
- * modified optimization in xsaveopt.
- *
- * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave
- * because xsaveopt supports modified optimization which is not
- * supported by xsave.
- *
- * If none of xsaves and xsaveopt is enabled, use xsave.
- */
- alternative_input_2(
- "1:"XSAVE,
- XSAVEOPT,
- X86_FEATURE_XSAVEOPT,
- XSAVES,
- X86_FEATURE_XSAVES,
- [xstate] "D" (xstate), "a" (lmask), "d" (hmask) :
- "memory");
- asm volatile("2:\n\t"
- xstate_fault(err)
- : "0" (err)
- : "memory");
+ XSTATE_XSAVE(xstate, lmask, hmask, err);
/* We should never fault when copying to a kernel buffer: */
WARN_ON_FPU(err);
@@ -344,23 +355,9 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
- int err = 0;
+ int err;
- /*
- * Use xrstors to restore context if it is enabled. xrstors supports
- * compacted format of xsave area which is not supported by xrstor.
- */
- alternative_input(
- "1: " XRSTOR,
- XRSTORS,
- X86_FEATURE_XSAVES,
- "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask)
- : "memory");
-
- asm volatile("2:\n"
- xstate_fault(err)
- : "0" (err)
- : "memory");
+ XSTATE_XRESTORE(xstate, lmask, hmask, err);
/* We should never fault when copying from a kernel buffer: */
WARN_ON_FPU(err);
@@ -388,12 +385,10 @@ static inline int copy_xregs_to_user(struct xregs_state __user *buf)
if (unlikely(err))
return -EFAULT;
- __asm__ __volatile__(ASM_STAC "\n"
- "1:"XSAVE"\n"
- "2: " ASM_CLAC "\n"
- xstate_fault(err)
- : "D" (buf), "a" (-1), "d" (-1), "0" (err)
- : "memory");
+ stac();
+ XSTATE_OP(XSAVE, buf, -1, -1, err);
+ clac();
+
return err;
}
@@ -405,14 +400,12 @@ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
struct xregs_state *xstate = ((__force struct xregs_state *)buf);
u32 lmask = mask;
u32 hmask = mask >> 32;
- int err = 0;
-
- __asm__ __volatile__(ASM_STAC "\n"
- "1:"XRSTOR"\n"
- "2: " ASM_CLAC "\n"
- xstate_fault(err)
- : "D" (xstate), "a" (lmask), "d" (hmask), "0" (err)
- : "memory"); /* memory required? */
+ int err;
+
+ stac();
+ XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
+ clac();
+
return err;
}
diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h
new file mode 100644
index 000000000000..e1a411786bf5
--- /dev/null
+++ b/arch/x86/include/asm/intel_pt.h
@@ -0,0 +1,10 @@
+#ifndef _ASM_X86_INTEL_PT_H
+#define _ASM_X86_INTEL_PT_H
+
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
+void cpu_emergency_stop_pt(void);
+#else
+static inline void cpu_emergency_stop_pt(void) {}
+#endif
+
+#endif /* _ASM_X86_INTEL_PT_H */
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index 615fa9061b57..cfc9a0d2d07c 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -119,6 +119,8 @@ static inline void
native_apic_mem_write(APIC_ICR, cfg);
}
+extern void default_send_IPI_single(int cpu, int vector);
+extern void default_send_IPI_single_phys(int cpu, int vector);
extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
int vector);
extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 5daeca3d0f9e..adc54c12cbd1 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -1,12 +1,18 @@
#ifndef _ASM_X86_JUMP_LABEL_H
#define _ASM_X86_JUMP_LABEL_H
-#ifndef __ASSEMBLY__
-
-#include <linux/stringify.h>
-#include <linux/types.h>
-#include <asm/nops.h>
-#include <asm/asm.h>
+#ifndef HAVE_JUMP_LABEL
+/*
+ * For better or for worse, if jump labels (the gcc extension) are missing,
+ * then the entire static branch patching infrastructure is compiled out.
+ * If that happens, the code in here will malfunction. Raise a compiler
+ * error instead.
+ *
+ * In theory, jump labels and the static branch patching infrastructure
+ * could be decoupled to fix this.
+ */
+#error asm/jump_label.h included on a non-jump-label kernel
+#endif
#define JUMP_LABEL_NOP_SIZE 5
@@ -16,6 +22,14 @@
# define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC
#endif
+#include <asm/asm.h>
+#include <asm/nops.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("1:"
@@ -59,5 +73,40 @@ struct jump_entry {
jump_label_t key;
};
-#endif /* __ASSEMBLY__ */
+#else /* __ASSEMBLY__ */
+
+.macro STATIC_JUMP_IF_TRUE target, key, def
+.Lstatic_jump_\@:
+ .if \def
+ /* Equivalent to "jmp.d32 \target" */
+ .byte 0xe9
+ .long \target - .Lstatic_jump_after_\@
+.Lstatic_jump_after_\@:
+ .else
+ .byte STATIC_KEY_INIT_NOP
+ .endif
+ .pushsection __jump_table, "aw"
+ _ASM_ALIGN
+ _ASM_PTR .Lstatic_jump_\@, \target, \key
+ .popsection
+.endm
+
+.macro STATIC_JUMP_IF_FALSE target, key, def
+.Lstatic_jump_\@:
+ .if \def
+ .byte STATIC_KEY_INIT_NOP
+ .else
+ /* Equivalent to "jmp.d32 \target" */
+ .byte 0xe9
+ .long \target - .Lstatic_jump_after_\@
+.Lstatic_jump_after_\@:
+ .endif
+ .pushsection __jump_table, "aw"
+ _ASM_ALIGN
+ _ASM_PTR .Lstatic_jump_\@, \target, \key + 1
+ .popsection
+.endm
+
+#endif /* __ASSEMBLY__ */
+
#endif
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 34e62b1dcfce..1e1b07a5a738 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -1,6 +1,7 @@
#ifndef _ASM_X86_MICROCODE_H
#define _ASM_X86_MICROCODE_H
+#include <asm/cpu.h>
#include <linux/earlycpio.h>
#define native_rdmsr(msr, val1, val2) \
@@ -95,14 +96,14 @@ static inline void __exit exit_amd_microcode(void) {}
/*
* In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
- * x86_vendor() gets vendor id for BSP.
+ * x86_cpuid_vendor() gets vendor id for BSP.
*
* In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
- * coding, we still use x86_vendor() to get vendor id for AP.
+ * coding, we still use x86_cpuid_vendor() to get vendor id for AP.
*
- * x86_vendor() gets vendor information directly from CPUID.
+ * x86_cpuid_vendor() gets vendor information directly from CPUID.
*/
-static inline int x86_vendor(void)
+static inline int x86_cpuid_vendor(void)
{
u32 eax = 0x00000000;
u32 ebx, ecx = 0, edx;
@@ -118,40 +119,14 @@ static inline int x86_vendor(void)
return X86_VENDOR_UNKNOWN;
}
-static inline unsigned int __x86_family(unsigned int sig)
-{
- unsigned int x86;
-
- x86 = (sig >> 8) & 0xf;
-
- if (x86 == 0xf)
- x86 += (sig >> 20) & 0xff;
-
- return x86;
-}
-
-static inline unsigned int x86_family(void)
+static inline unsigned int x86_cpuid_family(void)
{
u32 eax = 0x00000001;
u32 ebx, ecx = 0, edx;
native_cpuid(&eax, &ebx, &ecx, &edx);
- return __x86_family(eax);
-}
-
-static inline unsigned int x86_model(unsigned int sig)
-{
- unsigned int x86, model;
-
- x86 = __x86_family(sig);
-
- model = (sig >> 4) & 0xf;
-
- if (x86 == 0x6 || x86 == 0xf)
- model += ((sig >> 16) & 0xf) << 4;
-
- return model;
+ return x86_family(eax);
}
#ifdef CONFIG_MICROCODE
diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h
index 93724cc62177..eb4b09b41df5 100644
--- a/arch/x86/include/asm/msi.h
+++ b/arch/x86/include/asm/msi.h
@@ -1,7 +1,13 @@
#ifndef _ASM_X86_MSI_H
#define _ASM_X86_MSI_H
#include <asm/hw_irq.h>
+#include <asm/irqdomain.h>
typedef struct irq_alloc_info msi_alloc_info_t;
+int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
+ msi_alloc_info_t *arg);
+
+void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc);
+
#endif /* _ASM_X86_MSI_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 9f3905697f12..b05402ef3b84 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -35,7 +35,7 @@
#define MSR_IA32_PERFCTR0 0x000000c1
#define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd
-#define MSR_NHM_PLATFORM_INFO 0x000000ce
+#define MSR_PLATFORM_INFO 0x000000ce
#define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
@@ -44,7 +44,6 @@
#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
-#define MSR_PLATFORM_INFO 0x000000ce
#define MSR_MTRRcap 0x000000fe
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
@@ -322,6 +321,7 @@
#define MSR_F15H_PERF_CTR 0xc0010201
#define MSR_F15H_NB_PERF_CTL 0xc0010240
#define MSR_F15H_NB_PERF_CTR 0xc0010241
+#define MSR_F15H_IC_CFG 0xc0011021
/* Fam 10h MSRs */
#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
diff --git a/arch/x86/include/asm/msr-trace.h b/arch/x86/include/asm/msr-trace.h
new file mode 100644
index 000000000000..7567225747d8
--- /dev/null
+++ b/arch/x86/include/asm/msr-trace.h
@@ -0,0 +1,57 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM msr
+
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE msr-trace
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH asm/
+
+#if !defined(_TRACE_MSR_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MSR_H
+
+#include <linux/tracepoint.h>
+
+/*
+ * Tracing for x86 model specific registers. Directly maps to the
+ * RDMSR/WRMSR instructions.
+ */
+
+DECLARE_EVENT_CLASS(msr_trace_class,
+ TP_PROTO(unsigned msr, u64 val, int failed),
+ TP_ARGS(msr, val, failed),
+ TP_STRUCT__entry(
+ __field( unsigned, msr )
+ __field( u64, val )
+ __field( int, failed )
+ ),
+ TP_fast_assign(
+ __entry->msr = msr;
+ __entry->val = val;
+ __entry->failed = failed;
+ ),
+ TP_printk("%x, value %llx%s",
+ __entry->msr,
+ __entry->val,
+ __entry->failed ? " #GP" : "")
+);
+
+DEFINE_EVENT(msr_trace_class, read_msr,
+ TP_PROTO(unsigned msr, u64 val, int failed),
+ TP_ARGS(msr, val, failed)
+);
+
+DEFINE_EVENT(msr_trace_class, write_msr,
+ TP_PROTO(unsigned msr, u64 val, int failed),
+ TP_ARGS(msr, val, failed)
+);
+
+DEFINE_EVENT(msr_trace_class, rdpmc,
+ TP_PROTO(unsigned msr, u64 val, int failed),
+ TP_ARGS(msr, val, failed)
+);
+
+#endif /* _TRACE_MSR_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 77d8b284e4a7..93fb7c1cffda 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -32,6 +32,16 @@ struct msr_regs_info {
int err;
};
+struct saved_msr {
+ bool valid;
+ struct msr_info info;
+};
+
+struct saved_msrs {
+ unsigned int num;
+ struct saved_msr *array;
+};
+
static inline unsigned long long native_read_tscp(unsigned int *aux)
{
unsigned long low, high;
@@ -57,11 +67,34 @@ static inline unsigned long long native_read_tscp(unsigned int *aux)
#define EAX_EDX_RET(val, low, high) "=A" (val)
#endif
+#ifdef CONFIG_TRACEPOINTS
+/*
+ * Be very careful with includes. This header is prone to include loops.
+ */
+#include <asm/atomic.h>
+#include <linux/tracepoint-defs.h>
+
+extern struct tracepoint __tracepoint_read_msr;
+extern struct tracepoint __tracepoint_write_msr;
+extern struct tracepoint __tracepoint_rdpmc;
+#define msr_tracepoint_active(t) static_key_false(&(t).key)
+extern void do_trace_write_msr(unsigned msr, u64 val, int failed);
+extern void do_trace_read_msr(unsigned msr, u64 val, int failed);
+extern void do_trace_rdpmc(unsigned msr, u64 val, int failed);
+#else
+#define msr_tracepoint_active(t) false
+static inline void do_trace_write_msr(unsigned msr, u64 val, int failed) {}
+static inline void do_trace_read_msr(unsigned msr, u64 val, int failed) {}
+static inline void do_trace_rdpmc(unsigned msr, u64 val, int failed) {}
+#endif
+
static inline unsigned long long native_read_msr(unsigned int msr)
{
DECLARE_ARGS(val, low, high);
asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
+ if (msr_tracepoint_active(__tracepoint_read_msr))
+ do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0);
return EAX_EDX_VAL(val, low, high);
}
@@ -78,6 +111,8 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
_ASM_EXTABLE(2b, 3b)
: [err] "=r" (*err), EAX_EDX_RET(val, low, high)
: "c" (msr), [fault] "i" (-EIO));
+ if (msr_tracepoint_active(__tracepoint_read_msr))
+ do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err);
return EAX_EDX_VAL(val, low, high);
}
@@ -85,6 +120,8 @@ static inline void native_write_msr(unsigned int msr,
unsigned low, unsigned high)
{
asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
+ if (msr_tracepoint_active(__tracepoint_read_msr))
+ do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
}
/* Can be uninlined because referenced by paravirt */
@@ -102,6 +139,8 @@ notrace static inline int native_write_msr_safe(unsigned int msr,
: "c" (msr), "0" (low), "d" (high),
[fault] "i" (-EIO)
: "memory");
+ if (msr_tracepoint_active(__tracepoint_read_msr))
+ do_trace_write_msr(msr, ((u64)high << 32 | low), err);
return err;
}
@@ -160,6 +199,8 @@ static inline unsigned long long native_read_pmc(int counter)
DECLARE_ARGS(val, low, high);
asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter));
+ if (msr_tracepoint_active(__tracepoint_rdpmc))
+ do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0);
return EAX_EDX_VAL(val, low, high);
}
@@ -190,7 +231,7 @@ static inline void wrmsr(unsigned msr, unsigned low, unsigned high)
static inline void wrmsrl(unsigned msr, u64 val)
{
- native_write_msr(msr, (u32)val, (u32)(val >> 32));
+ native_write_msr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
}
/* wrmsr with exception handling */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index c5b7fb2774d0..7bd0099384ca 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -5,23 +5,25 @@
#include <linux/types.h>
/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 12
-#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1))
+
+#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
+#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
+
+#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
+#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
#define __PHYSICAL_MASK ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1))
#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
-/* Cast PAGE_MASK to a signed type so that it is sign-extended if
+/* Cast *PAGE_MASK to a signed type so that it is sign-extended if
virtual addresses are 32-bits but physical addresses are larger
(ie, 32-bit PAE). */
#define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK)
-
-#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
-#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
-
-#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
-#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
+#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK)
+#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK)
#define HPAGE_SHIFT PMD_SHIFT
#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 10d0596433f8..f6192502149e 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -19,6 +19,12 @@ static inline int paravirt_enabled(void)
return pv_info.paravirt_enabled;
}
+static inline int paravirt_has_feature(unsigned int feature)
+{
+ WARN_ON_ONCE(!pv_info.paravirt_enabled);
+ return (pv_info.features & feature);
+}
+
static inline void load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
{
@@ -285,15 +291,6 @@ static inline void slow_down_io(void)
#endif
}
-#ifdef CONFIG_SMP
-static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip,
- unsigned long start_esp)
-{
- PVOP_VCALL3(pv_apic_ops.startup_ipi_hook,
- phys_apicid, start_eip, start_esp);
-}
-#endif
-
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
@@ -375,23 +372,6 @@ static inline void pte_update(struct mm_struct *mm, unsigned long addr,
{
PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
}
-static inline void pmd_update(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp)
-{
- PVOP_VCALL3(pv_mmu_ops.pmd_update, mm, addr, pmdp);
-}
-
-static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
-{
- PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep);
-}
-
-static inline void pmd_update_defer(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp)
-{
- PVOP_VCALL3(pv_mmu_ops.pmd_update_defer, mm, addr, pmdp);
-}
static inline pte_t __pte(pteval_t val)
{
@@ -922,23 +902,11 @@ extern void default_banner(void);
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-#define USERGS_SYSRET32 \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \
- CLBR_NONE, \
- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
-
#ifdef CONFIG_X86_32
#define GET_CR0_INTO_EAX \
push %ecx; push %edx; \
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
pop %edx; pop %ecx
-
-#define ENABLE_INTERRUPTS_SYSEXIT \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
- CLBR_NONE, \
- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
-
-
#else /* !CONFIG_X86_32 */
/*
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 31247b5bff7c..77db5616a473 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -70,9 +70,14 @@ struct pv_info {
#endif
int paravirt_enabled;
+ unsigned int features; /* valid only if paravirt_enabled is set */
const char *name;
};
+#define paravirt_has(x) paravirt_has_feature(PV_SUPPORTED_##x)
+/* Supported features */
+#define PV_SUPPORTED_RTC (1<<0)
+
struct pv_init_ops {
/*
* Patch may replace one of the defined code sequences with
@@ -157,15 +162,6 @@ struct pv_cpu_ops {
u64 (*read_pmc)(int counter);
-#ifdef CONFIG_X86_32
- /*
- * Atomically enable interrupts and return to userspace. This
- * is only used in 32-bit kernels. 64-bit kernels use
- * usergs_sysret32 instead.
- */
- void (*irq_enable_sysexit)(void);
-#endif
-
/*
* Switch to usermode gs and return to 64-bit usermode using
* sysret. Only used in 64-bit kernels to return to 64-bit
@@ -174,14 +170,6 @@ struct pv_cpu_ops {
*/
void (*usergs_sysret64)(void);
- /*
- * Switch to usermode gs and return to 32-bit usermode using
- * sysret. Used to return to 32-on-64 compat processes.
- * Other usermode register state, including %esp, must already
- * be restored.
- */
- void (*usergs_sysret32)(void);
-
/* Normal iret. Jump to this with the standard iret stack
frame set up. */
void (*iret)(void);
@@ -215,14 +203,6 @@ struct pv_irq_ops {
#endif
};
-struct pv_apic_ops {
-#ifdef CONFIG_X86_LOCAL_APIC
- void (*startup_ipi_hook)(int phys_apicid,
- unsigned long start_eip,
- unsigned long start_esp);
-#endif
-};
-
struct pv_mmu_ops {
unsigned long (*read_cr2)(void);
void (*write_cr2)(unsigned long);
@@ -274,12 +254,6 @@ struct pv_mmu_ops {
pmd_t *pmdp, pmd_t pmdval);
void (*pte_update)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep);
- void (*pte_update_defer)(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep);
- void (*pmd_update)(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp);
- void (*pmd_update_defer)(struct mm_struct *mm,
- unsigned long addr, pmd_t *pmdp);
pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep);
@@ -354,7 +328,6 @@ struct paravirt_patch_template {
struct pv_time_ops pv_time_ops;
struct pv_cpu_ops pv_cpu_ops;
struct pv_irq_ops pv_irq_ops;
- struct pv_apic_ops pv_apic_ops;
struct pv_mmu_ops pv_mmu_ops;
struct pv_lock_ops pv_lock_ops;
};
@@ -364,7 +337,6 @@ extern struct pv_init_ops pv_init_ops;
extern struct pv_time_ops pv_time_ops;
extern struct pv_cpu_ops pv_cpu_ops;
extern struct pv_irq_ops pv_irq_ops;
-extern struct pv_apic_ops pv_apic_ops;
extern struct pv_mmu_ops pv_mmu_ops;
extern struct pv_lock_ops pv_lock_ops;
@@ -402,10 +374,8 @@ extern struct pv_lock_ops pv_lock_ops;
__visible extern const char start_##ops##_##name[], end_##ops##_##name[]; \
asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name))
-unsigned paravirt_patch_nop(void);
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
-unsigned paravirt_patch_ignore(unsigned len);
unsigned paravirt_patch_call(void *insnbuf,
const void *target, u16 tgt_clobbers,
unsigned long addr, u16 site_clobbers,
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 6ec0c8b2e9df..d3eee663c41f 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -69,9 +69,6 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
#define pmd_clear(pmd) native_pmd_clear(pmd)
#define pte_update(mm, addr, ptep) do { } while (0)
-#define pte_update_defer(mm, addr, ptep) do { } while (0)
-#define pmd_update(mm, addr, ptep) do { } while (0)
-#define pmd_update_defer(mm, addr, ptep) do { } while (0)
#define pgd_val(x) native_pgd_val(x)
#define __pgd(x) native_make_pgd(x)
@@ -731,14 +728,9 @@ static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr,
* updates should either be sets, clears, or set_pte_atomic for P->P
* transitions, which means this hook should only be called for user PTEs.
* This hook implies a P->P protection or access change has taken place, which
- * requires a subsequent TLB flush. The notification can optionally be delayed
- * until the TLB flush event by using the pte_update_defer form of the
- * interface, but care must be taken to assure that the flush happens while
- * still holding the same page table lock so that the shadow and primary pages
- * do not become out of sync on SMP.
+ * requires a subsequent TLB flush.
*/
#define pte_update(mm, addr, ptep) do { } while (0)
-#define pte_update_defer(mm, addr, ptep) do { } while (0)
#endif
/*
@@ -830,9 +822,7 @@ static inline int pmd_write(pmd_t pmd)
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp)
{
- pmd_t pmd = native_pmdp_get_and_clear(pmdp);
- pmd_update(mm, addr, pmdp);
- return pmd;
+ return native_pmdp_get_and_clear(pmdp);
}
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
@@ -840,7 +830,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp);
- pmd_update(mm, addr, pmdp);
}
/*
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index dd5b0aa9dd2f..a471cadb9630 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -279,17 +279,14 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
static inline pudval_t pud_pfn_mask(pud_t pud)
{
if (native_pud_val(pud) & _PAGE_PSE)
- return PUD_PAGE_MASK & PHYSICAL_PAGE_MASK;
+ return PHYSICAL_PUD_PAGE_MASK;
else
return PTE_PFN_MASK;
}
static inline pudval_t pud_flags_mask(pud_t pud)
{
- if (native_pud_val(pud) & _PAGE_PSE)
- return ~(PUD_PAGE_MASK & (pudval_t)PHYSICAL_PAGE_MASK);
- else
- return ~PTE_PFN_MASK;
+ return ~pud_pfn_mask(pud);
}
static inline pudval_t pud_flags(pud_t pud)
@@ -300,17 +297,14 @@ static inline pudval_t pud_flags(pud_t pud)
static inline pmdval_t pmd_pfn_mask(pmd_t pmd)
{
if (native_pmd_val(pmd) & _PAGE_PSE)
- return PMD_PAGE_MASK & PHYSICAL_PAGE_MASK;
+ return PHYSICAL_PMD_PAGE_MASK;
else
return PTE_PFN_MASK;
}
static inline pmdval_t pmd_flags_mask(pmd_t pmd)
{
- if (native_pmd_val(pmd) & _PAGE_PSE)
- return ~(PMD_PAGE_MASK & (pmdval_t)PHYSICAL_PAGE_MASK);
- else
- return ~PTE_PFN_MASK;
+ return ~pmd_pfn_mask(pmd);
}
static inline pmdval_t pmd_flags(pmd_t pmd)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 67522256c7ff..2d5a50cb61a2 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -472,6 +472,7 @@ static inline unsigned long current_top_of_stack(void)
#else
#define __cpuid native_cpuid
#define paravirt_enabled() 0
+#define paravirt_has(x) 0
static inline void load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 7a6bed5c08bc..fdcc04020636 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -4,6 +4,15 @@
#include <linux/clocksource.h>
#include <asm/pvclock-abi.h>
+#ifdef CONFIG_KVM_GUEST
+extern struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void);
+#else
+static inline struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
+{
+ return NULL;
+}
+#endif
+
/* some helper functions for xen and kvm pv clock sources */
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
@@ -91,10 +100,5 @@ struct pvclock_vsyscall_time_info {
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
-#define PVCLOCK_VSYSCALL_NR_PAGES (((NR_CPUS-1)/(PAGE_SIZE/PVTI_SIZE))+1)
-
-int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
- int size);
-struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu);
#endif /* _ASM_X86_PVCLOCK_H */
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index b002e711ba88..9f92c180ed2f 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -1,6 +1,65 @@
#ifndef __ASM_QSPINLOCK_PARAVIRT_H
#define __ASM_QSPINLOCK_PARAVIRT_H
+/*
+ * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit
+ * registers. For i386, however, only 1 32-bit register needs to be saved
+ * and restored. So an optimized version of __pv_queued_spin_unlock() is
+ * hand-coded for 64-bit, but it isn't worthwhile to do it for 32-bit.
+ */
+#ifdef CONFIG_64BIT
+
+PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
+#define __pv_queued_spin_unlock __pv_queued_spin_unlock
+#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock"
+#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slowpath"
+
+/*
+ * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock
+ * which combines the registers saving trunk and the body of the following
+ * C code:
+ *
+ * void __pv_queued_spin_unlock(struct qspinlock *lock)
+ * {
+ * struct __qspinlock *l = (void *)lock;
+ * u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
+ *
+ * if (likely(lockval == _Q_LOCKED_VAL))
+ * return;
+ * pv_queued_spin_unlock_slowpath(lock, lockval);
+ * }
+ *
+ * For x86-64,
+ * rdi = lock (first argument)
+ * rsi = lockval (second argument)
+ * rdx = internal variable (set to 0)
+ */
+asm (".pushsection .text;"
+ ".globl " PV_UNLOCK ";"
+ ".align 4,0x90;"
+ PV_UNLOCK ": "
+ "push %rdx;"
+ "mov $0x1,%eax;"
+ "xor %edx,%edx;"
+ "lock cmpxchg %dl,(%rdi);"
+ "cmp $0x1,%al;"
+ "jne .slowpath;"
+ "pop %rdx;"
+ "ret;"
+ ".slowpath: "
+ "push %rsi;"
+ "movzbl %al,%esi;"
+ "call " PV_UNLOCK_SLOWPATH ";"
+ "pop %rsi;"
+ "pop %rdx;"
+ "ret;"
+ ".size " PV_UNLOCK ", .-" PV_UNLOCK ";"
+ ".popsection");
+
+#else /* CONFIG_64BIT */
+
+extern void __pv_queued_spin_unlock(struct qspinlock *lock);
PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
+#endif /* CONFIG_64BIT */
#endif
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index a82c4f1b4d83..2cb1cc253d51 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,5 +25,6 @@ void __noreturn machine_real_restart(unsigned int type);
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
+void run_crash_ipi_callback(struct pt_regs *regs);
#endif /* _ASM_X86_REBOOT_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 222a6a3ca2b5..dfcf0727623b 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -21,15 +21,6 @@
extern int smp_num_siblings;
extern unsigned int num_processors;
-static inline bool cpu_has_ht_siblings(void)
-{
- bool has_siblings = false;
-#ifdef CONFIG_SMP
- has_siblings = cpu_has_ht && smp_num_siblings > 1;
-#endif
- return has_siblings;
-}
-
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
/* cpus sharing the last level cache: */
@@ -74,9 +65,6 @@ struct smp_ops {
extern void set_cpu_sibling_map(int cpu);
#ifdef CONFIG_SMP
-#ifndef CONFIG_PARAVIRT
-#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0)
-#endif
extern struct smp_ops smp_ops;
static inline void smp_send_stop(void)
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index d1793f06854d..8e9dbe7b73a1 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -15,6 +15,7 @@ struct saved_context {
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
bool misc_enable_saved;
+ struct saved_msrs saved_msrs;
struct desc_ptr gdt_desc;
struct desc_ptr idt;
u16 ldt;
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 7ebf0ebe4e68..6136a18152af 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -24,6 +24,7 @@ struct saved_context {
unsigned long cr0, cr2, cr3, cr4, cr8;
u64 misc_enable;
bool misc_enable_saved;
+ struct saved_msrs saved_msrs;
unsigned long efer;
u16 gdt_pad; /* Unused */
struct desc_ptr gdt_desc;
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 09b1b0ab94b7..660458af425d 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -745,5 +745,14 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
#undef __copy_from_user_overflow
#undef __copy_to_user_overflow
+/*
+ * We rely on the nested NMI work to allow atomic faults from the NMI path; the
+ * nested NMI paths are careful to preserve CR2.
+ *
+ * Caller must use pagefault_enable/disable, or run in interrupt context,
+ * and also do a uaccess_ok() check
+ */
+#define __copy_from_user_nmi __copy_from_user_inatomic
+
#endif /* _ASM_X86_UACCESS_H */
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 756de9190aec..deabaf9759b6 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -22,6 +22,7 @@ struct vdso_image {
long sym_vvar_page;
long sym_hpet_page;
+ long sym_pvclock_page;
long sym_VDSO32_NOTE_MASK;
long sym___kernel_sigreturn;
long sym___kernel_rt_sigreturn;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 48d34d28f5a6..1ae89a2721d6 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -1,7 +1,6 @@
#ifndef _ASM_X86_PLATFORM_H
#define _ASM_X86_PLATFORM_H
-#include <asm/pgtable_types.h>
#include <asm/bootparam.h>
struct mpc_bus;
@@ -83,13 +82,11 @@ struct x86_init_paging {
* struct x86_init_timers - platform specific timer setup
* @setup_perpcu_clockev: set up the per cpu clock event device for the
* boot cpu
- * @tsc_pre_init: platform function called before TSC init
* @timer_init: initialize the platform timer (default PIT/HPET)
* @wallclock_init: init the wallclock device
*/
struct x86_init_timers {
void (*setup_percpu_clockev)(void);
- void (*tsc_pre_init)(void);
void (*timer_init)(void);
void (*wallclock_init)(void);
};
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 5a08bc8bff33..c54beb44c4c1 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -553,7 +553,7 @@ do { \
if (cpu_has_xmm) { \
xor_speed(&xor_block_pIII_sse); \
xor_speed(&xor_block_sse_pf64); \
- } else if (cpu_has_mmx) { \
+ } else if (boot_cpu_has(X86_FEATURE_MMX)) { \
xor_speed(&xor_block_pII_mmx); \
xor_speed(&xor_block_p5_mmx); \
} else { \
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 03429da2fa80..2184943341bf 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -16,7 +16,7 @@ struct mce {
__u8 cpuvendor; /* cpu vendor as encoded in system.h */
__u8 inject_flags; /* software inject flags */
__u8 severity;
- __u8 usable_addr;
+ __u8 pad;
__u32 cpuid; /* CPUID 1 EAX */
__u8 cs; /* code segment */
__u8 bank; /* machine check bank */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2f69e3b184f6..8a5cddac7d44 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -82,6 +82,12 @@ physid_mask_t phys_cpu_present_map;
static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID;
/*
+ * This variable controls which CPUs receive external NMIs. By default,
+ * external NMIs are delivered only to the BSP.
+ */
+static int apic_extnmi = APIC_EXTNMI_BSP;
+
+/*
* Map cpu index to physical APIC ID
*/
DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
@@ -1161,6 +1167,8 @@ void __init init_bsp_APIC(void)
value = APIC_DM_NMI;
if (!lapic_is_integrated()) /* 82489DX */
value |= APIC_LVT_LEVEL_TRIGGER;
+ if (apic_extnmi == APIC_EXTNMI_NONE)
+ value |= APIC_LVT_MASKED;
apic_write(APIC_LVT1, value);
}
@@ -1378,9 +1386,11 @@ void setup_local_APIC(void)
apic_write(APIC_LVT0, value);
/*
- * only the BP should see the LINT1 NMI signal, obviously.
+ * Only the BSP sees the LINT1 NMI signal by default. This can be
+ * modified by apic_extnmi= boot option.
*/
- if (!cpu)
+ if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) ||
+ apic_extnmi == APIC_EXTNMI_ALL)
value = APIC_DM_NMI;
else
value = APIC_DM_NMI | APIC_LVT_MASKED;
@@ -2270,6 +2280,7 @@ static struct {
unsigned int apic_tmict;
unsigned int apic_tdcr;
unsigned int apic_thmr;
+ unsigned int apic_cmci;
} apic_pm_state;
static int lapic_suspend(void)
@@ -2299,6 +2310,10 @@ static int lapic_suspend(void)
if (maxlvt >= 5)
apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
#endif
+#ifdef CONFIG_X86_MCE_INTEL
+ if (maxlvt >= 6)
+ apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI);
+#endif
local_irq_save(flags);
disable_local_APIC();
@@ -2355,10 +2370,14 @@ static void lapic_resume(void)
apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-#if defined(CONFIG_X86_MCE_INTEL)
+#ifdef CONFIG_X86_THERMAL_VECTOR
if (maxlvt >= 5)
apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
#endif
+#ifdef CONFIG_X86_MCE_INTEL
+ if (maxlvt >= 6)
+ apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci);
+#endif
if (maxlvt >= 4)
apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
@@ -2548,3 +2567,23 @@ static int __init apic_set_disabled_cpu_apicid(char *arg)
return 0;
}
early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid);
+
+static int __init apic_set_extnmi(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
+
+ if (!strncmp("all", arg, 3))
+ apic_extnmi = APIC_EXTNMI_ALL;
+ else if (!strncmp("none", arg, 4))
+ apic_extnmi = APIC_EXTNMI_NONE;
+ else if (!strncmp("bsp", arg, 3))
+ apic_extnmi = APIC_EXTNMI_BSP;
+ else {
+ pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+early_param("apic_extnmi", apic_set_extnmi);
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index f92ab36979a2..9968f30cca3e 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -185,6 +185,7 @@ static struct apic apic_flat = {
.cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
+ .send_IPI = default_send_IPI_single,
.send_IPI_mask = flat_send_IPI_mask,
.send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
.send_IPI_allbutself = flat_send_IPI_allbutself,
@@ -230,17 +231,6 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return 0;
}
-static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
-{
- default_send_IPI_mask_sequence_phys(cpumask, vector);
-}
-
-static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
- int vector)
-{
- default_send_IPI_mask_allbutself_phys(cpumask, vector);
-}
-
static void physflat_send_IPI_allbutself(int vector)
{
default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
@@ -248,7 +238,7 @@ static void physflat_send_IPI_allbutself(int vector)
static void physflat_send_IPI_all(int vector)
{
- physflat_send_IPI_mask(cpu_online_mask, vector);
+ default_send_IPI_mask_sequence_phys(cpu_online_mask, vector);
}
static int physflat_probe(void)
@@ -292,8 +282,9 @@ static struct apic apic_physflat = {
.cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
- .send_IPI_mask = physflat_send_IPI_mask,
- .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
+ .send_IPI = default_send_IPI_single_phys,
+ .send_IPI_mask = default_send_IPI_mask_sequence_phys,
+ .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_phys,
.send_IPI_allbutself = physflat_send_IPI_allbutself,
.send_IPI_all = physflat_send_IPI_all,
.send_IPI_self = apic_send_IPI_self,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 0d96749cfcac..331a7a07c48f 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -30,6 +30,7 @@
#include <asm/e820.h>
static void noop_init_apic_ldr(void) { }
+static void noop_send_IPI(int cpu, int vector) { }
static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { }
static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { }
static void noop_send_IPI_allbutself(int vector) { }
@@ -144,6 +145,7 @@ struct apic apic_noop = {
.cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
+ .send_IPI = noop_send_IPI,
.send_IPI_mask = noop_send_IPI_mask,
.send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself,
.send_IPI_allbutself = noop_send_IPI_allbutself,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 38dd5efdd04c..c80c02c6ec49 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -193,20 +193,17 @@ static int __init numachip_system_init(void)
case 1:
init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
numachip_apic_icr_write = numachip1_apic_icr_write;
- x86_init.pci.arch_init = pci_numachip_init;
break;
case 2:
init_extra_mapping_uc(NUMACHIP2_LCSR_BASE, NUMACHIP2_LCSR_SIZE);
numachip_apic_icr_write = numachip2_apic_icr_write;
-
- /* Use MCFG config cycles rather than locked CF8 cycles */
- raw_pci_ops = &pci_mmcfg;
break;
default:
return 0;
}
x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
+ x86_init.pci.arch_init = pci_numachip_init;
return 0;
}
@@ -276,6 +273,7 @@ static const struct apic apic_numachip1 __refconst = {
.cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+ .send_IPI = numachip_send_IPI_one,
.send_IPI_mask = numachip_send_IPI_mask,
.send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself,
.send_IPI_allbutself = numachip_send_IPI_allbutself,
@@ -327,6 +325,7 @@ static const struct apic apic_numachip2 __refconst = {
.cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+ .send_IPI = numachip_send_IPI_one,
.send_IPI_mask = numachip_send_IPI_mask,
.send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself,
.send_IPI_allbutself = numachip_send_IPI_allbutself,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 971cf8875939..cf9bd896c12d 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -96,11 +96,6 @@ static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
return cpuid_apic >> index_msb;
}
-static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector)
-{
- default_send_IPI_mask_sequence_phys(mask, vector);
-}
-
static void bigsmp_send_IPI_allbutself(int vector)
{
default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
@@ -108,7 +103,7 @@ static void bigsmp_send_IPI_allbutself(int vector)
static void bigsmp_send_IPI_all(int vector)
{
- bigsmp_send_IPI_mask(cpu_online_mask, vector);
+ default_send_IPI_mask_sequence_phys(cpu_online_mask, vector);
}
static int dmi_bigsmp; /* can be set by dmi scanners */
@@ -180,7 +175,8 @@ static struct apic apic_bigsmp = {
.cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
- .send_IPI_mask = bigsmp_send_IPI_mask,
+ .send_IPI = default_send_IPI_single_phys,
+ .send_IPI_mask = default_send_IPI_mask_sequence_phys,
.send_IPI_mask_allbutself = NULL,
.send_IPI_allbutself = bigsmp_send_IPI_allbutself,
.send_IPI_all = bigsmp_send_IPI_all,
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 62071569bd50..eb45fc9b6124 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -18,6 +18,16 @@
#include <asm/proto.h>
#include <asm/ipi.h>
+void default_send_IPI_single_phys(int cpu, int vector)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, cpu),
+ vector, APIC_DEST_PHYSICAL);
+ local_irq_restore(flags);
+}
+
void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
{
unsigned long query_cpu;
@@ -55,6 +65,14 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
local_irq_restore(flags);
}
+/*
+ * Helper function for APICs which insist on cpumasks
+ */
+void default_send_IPI_single(int cpu, int vector)
+{
+ apic->send_IPI_mask(cpumask_of(cpu), vector);
+}
+
#ifdef CONFIG_X86_32
void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 5f1feb6854af..ade25320df96 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -96,8 +96,8 @@ static irq_hw_number_t pci_msi_get_hwirq(struct msi_domain_info *info,
return arg->msi_hwirq;
}
-static int pci_msi_prepare(struct irq_domain *domain, struct device *dev,
- int nvec, msi_alloc_info_t *arg)
+int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
+ msi_alloc_info_t *arg)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct msi_desc *desc = first_pci_msi_entry(pdev);
@@ -113,11 +113,13 @@ static int pci_msi_prepare(struct irq_domain *domain, struct device *dev,
return 0;
}
+EXPORT_SYMBOL_GPL(pci_msi_prepare);
-static void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
+void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
{
arg->msi_hwirq = pci_msi_domain_calc_hwirq(arg->msi_dev, desc);
}
+EXPORT_SYMBOL_GPL(pci_msi_set_desc);
static struct msi_domain_ops pci_msi_domain_ops = {
.get_hwirq = pci_msi_get_hwirq,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 7694ae6c1199..f316e34abb42 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -105,6 +105,7 @@ static struct apic apic_default = {
.cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
+ .send_IPI = default_send_IPI_single,
.send_IPI_mask = default_send_IPI_mask_logical,
.send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical,
.send_IPI_allbutself = default_send_IPI_allbutself,
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 861bc59c8f25..908cb37da171 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -29,6 +29,7 @@ struct apic_chip_data {
};
struct irq_domain *x86_vector_domain;
+EXPORT_SYMBOL_GPL(x86_vector_domain);
static DEFINE_RAW_SPINLOCK(vector_lock);
static cpumask_var_t vector_cpumask;
static struct irq_chip lapic_controller;
@@ -66,6 +67,7 @@ struct irq_cfg *irqd_cfg(struct irq_data *irq_data)
return data ? &data->cfg : NULL;
}
+EXPORT_SYMBOL_GPL(irqd_cfg);
struct irq_cfg *irq_cfg(unsigned int irq)
{
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index cc8311c4d298..aca8b75c1552 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -23,6 +23,14 @@ static inline u32 x2apic_cluster(int cpu)
return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16;
}
+static void x2apic_send_IPI(int cpu, int vector)
+{
+ u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
+
+ x2apic_wrmsr_fence();
+ __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL);
+}
+
static void
__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
{
@@ -266,6 +274,7 @@ static struct apic apic_x2apic_cluster = {
.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
+ .send_IPI = x2apic_send_IPI,
.send_IPI_mask = x2apic_send_IPI_mask,
.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
.send_IPI_allbutself = x2apic_send_IPI_allbutself,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 662e9150ea6f..a1242e2c12e6 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -36,6 +36,14 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys());
}
+static void x2apic_send_IPI(int cpu, int vector)
+{
+ u32 dest = per_cpu(x86_cpu_to_apicid, cpu);
+
+ x2apic_wrmsr_fence();
+ __x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL);
+}
+
static void
__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
{
@@ -122,6 +130,7 @@ static struct apic apic_x2apic_phys = {
.cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+ .send_IPI = x2apic_send_IPI,
.send_IPI_mask = x2apic_send_IPI_mask,
.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
.send_IPI_allbutself = x2apic_send_IPI_allbutself,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 4a139465f1d4..d760c6bb37b5 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -406,6 +406,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
.cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
+ .send_IPI = uv_send_IPI_one,
.send_IPI_mask = uv_send_IPI_mask,
.send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
.send_IPI_allbutself = uv_send_IPI_allbutself,
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 439df975bc7a..84a7524b202c 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -65,9 +65,6 @@ void common(void) {
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
-#ifdef CONFIG_X86_32
- OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
-#endif
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
#endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index d8f42f902a0f..f2edafb5f24e 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -23,7 +23,6 @@ int main(void)
{
#ifdef CONFIG_PARAVIRT
OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
- OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
BLANK();
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a8816b325162..e678ddeed030 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -304,7 +304,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
int cpu = smp_processor_id();
/* get information required for multi-node processors */
- if (cpu_has_topoext) {
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
u32 eax, ebx, ecx, edx;
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
@@ -678,9 +678,9 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
* Disable it on the affected CPUs.
*/
if ((c->x86_model >= 0x02) && (c->x86_model < 0x20)) {
- if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) {
+ if (!rdmsrl_safe(MSR_F15H_IC_CFG, &value) && !(value & 0x1E)) {
value |= 0x1E;
- wrmsrl_safe(0xc0011021, value);
+ wrmsrl_safe(MSR_F15H_IC_CFG, value);
}
}
}
@@ -922,7 +922,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
void set_dr_addr_mask(unsigned long mask, int dr)
{
- if (!cpu_has_bpext)
+ if (!boot_cpu_has(X86_FEATURE_BPEXT))
return;
switch (dr) {
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index d8fba5c15fbd..ae20be6e483c 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -43,7 +43,7 @@ static void init_c3(struct cpuinfo_x86 *c)
/* store Centaur Extended Feature Flags as
* word 5 of the CPU capability bit array
*/
- c->x86_capability[5] = cpuid_edx(0xC0000001);
+ c->x86_capability[CPUID_C000_0001_EDX] = cpuid_edx(0xC0000001);
}
#ifdef CONFIG_X86_32
/* Cyrix III family needs CX8 & PGE explicitly enabled. */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4ddd780aeac9..37830de8f60a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -273,10 +273,9 @@ __setup("nosmap", setup_disable_smap);
static __always_inline void setup_smap(struct cpuinfo_x86 *c)
{
- unsigned long eflags;
+ unsigned long eflags = native_save_fl();
/* This should have been cleared long ago */
- raw_local_save_flags(eflags);
BUG_ON(eflags & X86_EFLAGS_AC);
if (cpu_has(c, X86_FEATURE_SMAP)) {
@@ -582,14 +581,9 @@ void cpu_detect(struct cpuinfo_x86 *c)
u32 junk, tfms, cap0, misc;
cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
- c->x86 = (tfms >> 8) & 0xf;
- c->x86_model = (tfms >> 4) & 0xf;
- c->x86_mask = tfms & 0xf;
-
- if (c->x86 == 0xf)
- c->x86 += (tfms >> 20) & 0xff;
- if (c->x86 >= 0x6)
- c->x86_model += ((tfms >> 16) & 0xf) << 4;
+ c->x86 = x86_family(tfms);
+ c->x86_model = x86_model(tfms);
+ c->x86_mask = x86_stepping(tfms);
if (cap0 & (1<<19)) {
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
@@ -600,50 +594,47 @@ void cpu_detect(struct cpuinfo_x86 *c)
void get_cpu_cap(struct cpuinfo_x86 *c)
{
- u32 tfms, xlvl;
- u32 ebx;
+ u32 eax, ebx, ecx, edx;
/* Intel-defined flags: level 0x00000001 */
if (c->cpuid_level >= 0x00000001) {
- u32 capability, excap;
+ cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
- cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
- c->x86_capability[0] = capability;
- c->x86_capability[4] = excap;
+ c->x86_capability[CPUID_1_ECX] = ecx;
+ c->x86_capability[CPUID_1_EDX] = edx;
}
/* Additional Intel-defined flags: level 0x00000007 */
if (c->cpuid_level >= 0x00000007) {
- u32 eax, ebx, ecx, edx;
-
cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
- c->x86_capability[9] = ebx;
+ c->x86_capability[CPUID_7_0_EBX] = ebx;
+
+ c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006);
}
/* Extended state features: level 0x0000000d */
if (c->cpuid_level >= 0x0000000d) {
- u32 eax, ebx, ecx, edx;
-
cpuid_count(0x0000000d, 1, &eax, &ebx, &ecx, &edx);
- c->x86_capability[10] = eax;
+ c->x86_capability[CPUID_D_1_EAX] = eax;
}
/* Additional Intel-defined flags: level 0x0000000F */
if (c->cpuid_level >= 0x0000000F) {
- u32 eax, ebx, ecx, edx;
/* QoS sub-leaf, EAX=0Fh, ECX=0 */
cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx);
- c->x86_capability[11] = edx;
+ c->x86_capability[CPUID_F_0_EDX] = edx;
+
if (cpu_has(c, X86_FEATURE_CQM_LLC)) {
/* will be overridden if occupancy monitoring exists */
c->x86_cache_max_rmid = ebx;
/* QoS sub-leaf, EAX=0Fh, ECX=1 */
cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx);
- c->x86_capability[12] = edx;
+ c->x86_capability[CPUID_F_1_EDX] = edx;
+
if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) {
c->x86_cache_max_rmid = ecx;
c->x86_cache_occ_scale = ebx;
@@ -655,22 +646,24 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
}
/* AMD-defined flags: level 0x80000001 */
- xlvl = cpuid_eax(0x80000000);
- c->extended_cpuid_level = xlvl;
+ eax = cpuid_eax(0x80000000);
+ c->extended_cpuid_level = eax;
- if ((xlvl & 0xffff0000) == 0x80000000) {
- if (xlvl >= 0x80000001) {
- c->x86_capability[1] = cpuid_edx(0x80000001);
- c->x86_capability[6] = cpuid_ecx(0x80000001);
+ if ((eax & 0xffff0000) == 0x80000000) {
+ if (eax >= 0x80000001) {
+ cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+
+ c->x86_capability[CPUID_8000_0001_ECX] = ecx;
+ c->x86_capability[CPUID_8000_0001_EDX] = edx;
}
}
if (c->extended_cpuid_level >= 0x80000008) {
- u32 eax = cpuid_eax(0x80000008);
+ cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
c->x86_virt_bits = (eax >> 8) & 0xff;
c->x86_phys_bits = eax & 0xff;
- c->x86_capability[13] = cpuid_ebx(0x80000008);
+ c->x86_capability[CPUID_8000_0008_EBX] = ebx;
}
#ifdef CONFIG_X86_32
else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
@@ -680,6 +673,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
if (c->extended_cpuid_level >= 0x80000007)
c->x86_power = cpuid_edx(0x80000007);
+ if (c->extended_cpuid_level >= 0x8000000a)
+ c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
+
init_scattered_cpuid_features(c);
}
@@ -1186,7 +1182,7 @@ void syscall_init(void)
* They both write to the same internal register. STAR allows to
* set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
*/
- wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
+ wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
#ifdef CONFIG_IA32_EMULATION
@@ -1444,7 +1440,9 @@ void cpu_init(void)
printk(KERN_INFO "Initializing CPU#%d\n", cpu);
- if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || cpu_has_de)
+ if (cpu_feature_enabled(X86_FEATURE_VME) ||
+ cpu_has_tsc ||
+ boot_cpu_has(X86_FEATURE_DE))
cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
load_current_idt();
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 209ac1e7d1f0..565648bc1a0a 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -445,7 +445,8 @@ static void init_intel(struct cpuinfo_x86 *c)
if (cpu_has_xmm2)
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
- if (cpu_has_ds) {
+
+ if (boot_cpu_has(X86_FEATURE_DS)) {
unsigned int l1;
rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
if (!(l1 & (1<<11)))
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index e38d338a6447..0b6c52388cf4 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -591,7 +591,7 @@ cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
unsigned edx;
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
- if (cpu_has_topoext)
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT))
cpuid_count(0x8000001d, index, &eax.full,
&ebx.full, &ecx.full, &edx);
else
@@ -637,7 +637,7 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c)
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
{
- if (cpu_has_topoext) {
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
num_cache_leaves = find_num_cache_leaves(c);
} else if (c->extended_cpuid_level >= 0x80000006) {
if (cpuid_edx(0x80000006) & 0xf000)
@@ -809,7 +809,7 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
struct cacheinfo *this_leaf;
int i, sibling;
- if (cpu_has_topoext) {
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
unsigned int apicid, nshared, first, last;
this_leaf = this_cpu_ci->info_list + index;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index c5b0d562dbf5..a006f4cd792b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -114,7 +114,6 @@ static struct work_struct mce_work;
static struct irq_work mce_irq_work;
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
-static int mce_usable_address(struct mce *m);
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
@@ -475,6 +474,28 @@ static void mce_report_event(struct pt_regs *regs)
irq_work_queue(&mce_irq_work);
}
+/*
+ * Check if the address reported by the CPU is in a format we can parse.
+ * It would be possible to add code for most other cases, but all would
+ * be somewhat complicated (e.g. segment offset would require an instruction
+ * parser). So only support physical addresses up to page granuality for now.
+ */
+static int mce_usable_address(struct mce *m)
+{
+ if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
+ return 0;
+
+ /* Checks after this one are Intel-specific: */
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return 1;
+
+ if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
+ return 0;
+ if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
+ return 0;
+ return 1;
+}
+
static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
@@ -484,7 +505,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
if (!mce)
return NOTIFY_DONE;
- if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) {
+ if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
pfn = mce->addr >> PAGE_SHIFT;
memory_failure(pfn, MCE_VECTOR, 0);
}
@@ -522,10 +543,10 @@ static bool memory_error(struct mce *m)
struct cpuinfo_x86 *c = &boot_cpu_data;
if (c->x86_vendor == X86_VENDOR_AMD) {
- /*
- * coming soon
- */
- return false;
+ /* ErrCodeExt[20:16] */
+ u8 xec = (m->status >> 16) & 0x1f;
+
+ return (xec == 0x0 || xec == 0x8);
} else if (c->x86_vendor == X86_VENDOR_INTEL) {
/*
* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
@@ -567,7 +588,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
*/
bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
- bool error_logged = false;
+ bool error_seen = false;
struct mce m;
int severity;
int i;
@@ -601,6 +622,8 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
(m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC)))
continue;
+ error_seen = true;
+
mce_read_aux(&m, i);
if (!(flags & MCP_TIMESTAMP))
@@ -608,27 +631,24 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
- /*
- * In the cases where we don't have a valid address after all,
- * do not add it into the ring buffer.
- */
- if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) {
- if (m.status & MCI_STATUS_ADDRV) {
+ if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m))
+ if (m.status & MCI_STATUS_ADDRV)
m.severity = severity;
- m.usable_addr = mce_usable_address(&m);
-
- if (!mce_gen_pool_add(&m))
- mce_schedule_work();
- }
- }
/*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
*/
- if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) {
- error_logged = true;
+ if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
mce_log(&m);
+ else if (mce_usable_address(&m)) {
+ /*
+ * Although we skipped logging this, we still want
+ * to take action. Add to the pool so the registered
+ * notifiers will see it.
+ */
+ if (!mce_gen_pool_add(&m))
+ mce_schedule_work();
}
/*
@@ -644,7 +664,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
sync_core();
- return error_logged;
+ return error_seen;
}
EXPORT_SYMBOL_GPL(machine_check_poll);
@@ -931,23 +951,6 @@ reset:
return ret;
}
-/*
- * Check if the address reported by the CPU is in a format we can parse.
- * It would be possible to add code for most other cases, but all would
- * be somewhat complicated (e.g. segment offset would require an instruction
- * parser). So only support physical addresses up to page granuality for now.
- */
-static int mce_usable_address(struct mce *m)
-{
- if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
- return 0;
- if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
- return 0;
- if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
- return 0;
- return 1;
-}
-
static void mce_clear_state(unsigned long *toclear)
{
int i;
@@ -999,6 +1002,17 @@ void do_machine_check(struct pt_regs *regs, long error_code)
int flags = MF_ACTION_REQUIRED;
int lmce = 0;
+ /* If this CPU is offline, just bail out. */
+ if (cpu_is_offline(smp_processor_id())) {
+ u64 mcgstatus;
+
+ mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+ if (mcgstatus & MCG_STATUS_RIPV) {
+ mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
+ return;
+ }
+ }
+
ist_enter(regs);
this_cpu_inc(mce_exception_count);
@@ -1089,7 +1103,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
/* assuming valid severity level != 0 */
m.severity = severity;
- m.usable_addr = mce_usable_address(&m);
mce_log(&m);
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 7fc27f1cca58..faec7120c508 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -129,8 +129,8 @@ void __init load_ucode_bsp(void)
if (!have_cpuid_p())
return;
- vendor = x86_vendor();
- family = x86_family();
+ vendor = x86_cpuid_vendor();
+ family = x86_cpuid_family();
switch (vendor) {
case X86_VENDOR_INTEL:
@@ -165,8 +165,8 @@ void load_ucode_ap(void)
if (!have_cpuid_p())
return;
- vendor = x86_vendor();
- family = x86_family();
+ vendor = x86_cpuid_vendor();
+ family = x86_cpuid_family();
switch (vendor) {
case X86_VENDOR_INTEL:
@@ -206,8 +206,8 @@ void reload_early_microcode(void)
{
int vendor, family;
- vendor = x86_vendor();
- family = x86_family();
+ vendor = x86_cpuid_vendor();
+ family = x86_cpuid_family();
switch (vendor) {
case X86_VENDOR_INTEL:
@@ -698,3 +698,4 @@ int __init microcode_init(void)
return error;
}
+late_initcall(microcode_init);
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index ce47402eb2f9..ee81c544ee0d 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -145,10 +145,10 @@ matching_model_microcode(struct microcode_header_intel *mc_header,
int ext_sigcount, i;
struct extended_signature *ext_sig;
- fam = __x86_family(sig);
+ fam = x86_family(sig);
model = x86_model(sig);
- fam_ucode = __x86_family(mc_header->sig);
+ fam_ucode = x86_family(mc_header->sig);
model_ucode = x86_model(mc_header->sig);
if (fam == fam_ucode && model == model_ucode)
@@ -163,7 +163,7 @@ matching_model_microcode(struct microcode_header_intel *mc_header,
ext_sigcount = ext_header->count;
for (i = 0; i < ext_sigcount; i++) {
- fam_ucode = __x86_family(ext_sig->sig);
+ fam_ucode = x86_family(ext_sig->sig);
model_ucode = x86_model(ext_sig->sig);
if (fam == fam_ucode && model == model_ucode)
@@ -365,7 +365,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
native_cpuid(&eax, &ebx, &ecx, &edx);
csig.sig = eax;
- family = __x86_family(csig.sig);
+ family = x86_family(csig.sig);
model = x86_model(csig.sig);
if ((model >= 5) || (family > 6)) {
@@ -521,16 +521,12 @@ static bool __init load_builtin_intel_microcode(struct cpio_data *cp)
{
#ifdef CONFIG_X86_64
unsigned int eax = 0x00000001, ebx, ecx = 0, edx;
- unsigned int family, model, stepping;
char name[30];
native_cpuid(&eax, &ebx, &ecx, &edx);
- family = __x86_family(eax);
- model = x86_model(eax);
- stepping = eax & 0xf;
-
- sprintf(name, "intel-ucode/%02x-%02x-%02x", family, model, stepping);
+ sprintf(name, "intel-ucode/%02x-%02x-%02x",
+ x86_family(eax), x86_model(eax), x86_stepping(eax));
return get_builtin_firmware(cp, name);
#else
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 70d7c93f4550..0d98503c2245 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -593,9 +593,16 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
unsigned long x_remove_base,
unsigned long x_remove_size, int i)
{
- static struct range range_new[RANGE_NUM];
+ /*
+ * range_new should really be an automatic variable, but
+ * putting 4096 bytes on the stack is frowned upon, to put it
+ * mildly. It is safe to make it a static __initdata variable,
+ * since mtrr_calc_range_state is only called during init and
+ * there's no way it will call itself recursively.
+ */
+ static struct range range_new[RANGE_NUM] __initdata;
unsigned long range_sums_new;
- static int nr_range_new;
+ int nr_range_new;
int num_reg;
/* Convert ranges to var ranges state: */
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 3b533cf37c74..c870af161008 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -349,7 +349,7 @@ static void get_fixed_ranges(mtrr_type *frs)
void mtrr_save_fixed_ranges(void *info)
{
- if (cpu_has_mtrr)
+ if (boot_cpu_has(X86_FEATURE_MTRR))
get_fixed_ranges(mtrr_state.fixed_ranges);
}
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index f891b4750f04..5c3d149ee91c 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -682,7 +682,7 @@ void __init mtrr_bp_init(void)
phys_addr = 32;
- if (cpu_has_mtrr) {
+ if (boot_cpu_has(X86_FEATURE_MTRR)) {
mtrr_if = &generic_mtrr_ops;
size_or_mask = SIZE_OR_MASK_BITS(36);
size_and_mask = 0x00f00000;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4562cf070c27..1b443db2db50 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -5,7 +5,7 @@
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2009 Jaswinder Singh Rajput
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
* Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
* Copyright (C) 2009 Google, Inc., Stephane Eranian
*
@@ -482,6 +482,9 @@ int x86_pmu_hw_config(struct perf_event *event)
/* Support for IP fixup */
if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
precise++;
+
+ if (x86_pmu.pebs_prec_dist)
+ precise++;
}
if (event->attr.precise_ip > precise)
@@ -1531,6 +1534,7 @@ static void __init filter_events(struct attribute **attrs)
{
struct device_attribute *d;
struct perf_pmu_events_attr *pmu_attr;
+ int offset = 0;
int i, j;
for (i = 0; attrs[i]; i++) {
@@ -1539,7 +1543,7 @@ static void __init filter_events(struct attribute **attrs)
/* str trumps id */
if (pmu_attr->event_str)
continue;
- if (x86_pmu.event_map(i))
+ if (x86_pmu.event_map(i + offset))
continue;
for (j = i; attrs[j]; j++)
@@ -1547,6 +1551,14 @@ static void __init filter_events(struct attribute **attrs)
/* Check the shifted attr. */
i--;
+
+ /*
+ * event_map() is index based, the attrs array is organized
+ * by increasing event index. If we shift the events, then
+ * we need to compensate for the event_map(), otherwise
+ * we are looking up the wrong event in the map
+ */
+ offset++;
}
}
@@ -2250,12 +2262,19 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
ss_base = get_segment_base(regs->ss);
fp = compat_ptr(ss_base + regs->bp);
+ pagefault_disable();
while (entry->nr < PERF_MAX_STACK_DEPTH) {
unsigned long bytes;
frame.next_frame = 0;
frame.return_address = 0;
- bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
+ if (!access_ok(VERIFY_READ, fp, 8))
+ break;
+
+ bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
+ if (bytes != 0)
+ break;
+ bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4);
if (bytes != 0)
break;
@@ -2265,6 +2284,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
perf_callchain_store(entry, cs_base + frame.return_address);
fp = compat_ptr(ss_base + frame.next_frame);
}
+ pagefault_enable();
return 1;
}
#else
@@ -2302,12 +2322,19 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
if (perf_callchain_user32(regs, entry))
return;
+ pagefault_disable();
while (entry->nr < PERF_MAX_STACK_DEPTH) {
unsigned long bytes;
frame.next_frame = NULL;
frame.return_address = 0;
- bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
+ if (!access_ok(VERIFY_READ, fp, 16))
+ break;
+
+ bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8);
+ if (bytes != 0)
+ break;
+ bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8);
if (bytes != 0)
break;
@@ -2315,8 +2342,9 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
break;
perf_callchain_store(entry, frame.return_address);
- fp = frame.next_frame;
+ fp = (void __user *)frame.next_frame;
}
+ pagefault_enable();
}
/*
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 499f533dd3cc..7bb61e32fb29 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -5,7 +5,7 @@
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2009 Jaswinder Singh Rajput
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
* Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
* Copyright (C) 2009 Google, Inc., Stephane Eranian
*
@@ -14,17 +14,7 @@
#include <linux/perf_event.h>
-#if 0
-#undef wrmsrl
-#define wrmsrl(msr, val) \
-do { \
- unsigned int _msr = (msr); \
- u64 _val = (val); \
- trace_printk("wrmsrl(%x, %Lx)\n", (unsigned int)(_msr), \
- (unsigned long long)(_val)); \
- native_write_msr((_msr), (u32)(_val), (u32)(_val >> 32)); \
-} while (0)
-#endif
+/* To enable MSR tracing please use the generic trace points. */
/*
* | NHM/WSM | SNB |
@@ -318,6 +308,10 @@ struct cpu_hw_events {
#define INTEL_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+/* Constraint on specific umask bit only + event */
+#define INTEL_UBIT_EVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c))
+
/* Like UEVENT_CONSTRAINT, but match flags too */
#define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
@@ -387,7 +381,7 @@ struct cpu_hw_events {
/* Check flags and event code/umask, and set the HSW N/A flag */
#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
__EVENT_CONSTRAINT(code, n, \
- INTEL_ARCH_EVENT_MASK|INTEL_ARCH_EVENT_MASK, \
+ INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW)
@@ -589,7 +583,8 @@ struct x86_pmu {
bts_active :1,
pebs :1,
pebs_active :1,
- pebs_broken :1;
+ pebs_broken :1,
+ pebs_prec_dist :1;
int pebs_record_size;
void (*drain_pebs)(struct pt_regs *regs);
struct event_constraint *pebs_constraints;
@@ -627,6 +622,7 @@ struct x86_perf_task_context {
u64 lbr_from[MAX_LBR_ENTRIES];
u64 lbr_to[MAX_LBR_ENTRIES];
u64 lbr_info[MAX_LBR_ENTRIES];
+ int tos;
int lbr_callstack_users;
int lbr_stack_state;
};
@@ -906,6 +902,8 @@ void intel_pmu_lbr_init_hsw(void);
void intel_pmu_lbr_init_skl(void);
+void intel_pmu_lbr_init_knl(void);
+
int intel_pmu_setup_lbr_filter(struct perf_event *event);
void intel_pt_interrupt(void);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 1cee5d2d7ece..58610539b048 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -18,7 +18,7 @@ static __initconst const u64 amd_hw_cache_event_ids
[ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */
},
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
+ [ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
@@ -160,7 +160,7 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel)
if (offset)
return offset;
- if (!cpu_has_perfctr_core)
+ if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
offset = index;
else
offset = index << 1;
@@ -652,7 +652,7 @@ static __initconst const struct x86_pmu amd_pmu = {
static int __init amd_core_pmu_init(void)
{
- if (!cpu_has_perfctr_core)
+ if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
return 0;
switch (boot_cpu_data.x86) {
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
index cc6cedb8f25d..49742746a6c9 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -523,10 +523,10 @@ static int __init amd_uncore_init(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
goto fail_nodev;
- if (!cpu_has_topoext)
+ if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
goto fail_nodev;
- if (cpu_has_perfctr_nb) {
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
amd_uncore_nb = alloc_percpu(struct amd_uncore *);
if (!amd_uncore_nb) {
ret = -ENOMEM;
@@ -540,7 +540,7 @@ static int __init amd_uncore_init(void)
ret = 0;
}
- if (cpu_has_perfctr_l2) {
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
if (!amd_uncore_l2) {
ret = -ENOMEM;
@@ -583,10 +583,11 @@ fail_online:
/* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */
amd_uncore_nb = amd_uncore_l2 = NULL;
- if (cpu_has_perfctr_l2)
+
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_L2))
perf_pmu_unregister(&amd_l2_pmu);
fail_l2:
- if (cpu_has_perfctr_nb)
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
perf_pmu_unregister(&amd_nb_pmu);
if (amd_uncore_l2)
free_percpu(amd_uncore_l2);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index f63360be2238..a667078a5180 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -185,6 +185,14 @@ struct event_constraint intel_skl_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
+ INTEL_UEVENT_EXTRA_REG(0x01b7,
+ MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7,
+ MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
+ EVENT_EXTRA_END
+};
+
static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
@@ -232,7 +240,7 @@ static struct event_constraint intel_hsw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
- INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */
+ INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
/* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
@@ -255,7 +263,7 @@ struct event_constraint intel_bdw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
- INTEL_UEVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
+ INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
EVENT_CONSTRAINT_END
};
@@ -1457,6 +1465,42 @@ static __initconst const u64 slm_hw_cache_event_ids
},
};
+#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
+#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
+#define KNL_MCDRAM_LOCAL BIT_ULL(21)
+#define KNL_MCDRAM_FAR BIT_ULL(22)
+#define KNL_DDR_LOCAL BIT_ULL(23)
+#define KNL_DDR_FAR BIT_ULL(24)
+#define KNL_DRAM_ANY (KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \
+ KNL_DDR_LOCAL | KNL_DDR_FAR)
+#define KNL_L2_READ SLM_DMND_READ
+#define KNL_L2_WRITE SLM_DMND_WRITE
+#define KNL_L2_PREFETCH SLM_DMND_PREFETCH
+#define KNL_L2_ACCESS SLM_LLC_ACCESS
+#define KNL_L2_MISS (KNL_OT_L2_HITE | KNL_OT_L2_HITF | \
+ KNL_DRAM_ANY | SNB_SNP_ANY | \
+ SNB_NON_DRAM)
+
+static __initconst const u64 knl_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS,
+ [C(RESULT_MISS)] = KNL_L2_WRITE | KNL_L2_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS,
+ [C(RESULT_MISS)] = KNL_L2_PREFETCH | KNL_L2_MISS,
+ },
+ },
+};
+
/*
* Use from PMIs where the LBRs are already disabled.
*/
@@ -2475,6 +2519,44 @@ static void intel_pebs_aliases_snb(struct perf_event *event)
}
}
+static void intel_pebs_aliases_precdist(struct perf_event *event)
+{
+ if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+ /*
+ * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+ * (0x003c) so that we can use it with PEBS.
+ *
+ * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+ * PEBS capable. However we can use INST_RETIRED.PREC_DIST
+ * (0x01c0), which is a PEBS capable event, to get the same
+ * count.
+ *
+ * The PREC_DIST event has special support to minimize sample
+ * shadowing effects. One drawback is that it can be
+ * only programmed on counter 1, but that seems like an
+ * acceptable trade off.
+ */
+ u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16);
+
+ alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+ event->hw.config = alt_config;
+ }
+}
+
+static void intel_pebs_aliases_ivb(struct perf_event *event)
+{
+ if (event->attr.precise_ip < 3)
+ return intel_pebs_aliases_snb(event);
+ return intel_pebs_aliases_precdist(event);
+}
+
+static void intel_pebs_aliases_skl(struct perf_event *event)
+{
+ if (event->attr.precise_ip < 3)
+ return intel_pebs_aliases_core2(event);
+ return intel_pebs_aliases_precdist(event);
+}
+
static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
{
unsigned long flags = x86_pmu.free_running_flags;
@@ -3332,6 +3414,7 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_gen_event_constraints;
x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
pr_cont("Atom events, ");
break;
@@ -3431,7 +3514,8 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_ivb_event_constraints;
x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
- x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
+ x86_pmu.pebs_prec_dist = true;
if (boot_cpu_data.x86_model == 62)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
@@ -3464,7 +3548,8 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_hsw_event_constraints;
x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
x86_pmu.extra_regs = intel_snbep_extra_regs;
- x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
+ x86_pmu.pebs_prec_dist = true;
/* all extra regs are per-cpu when HT is on */
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
@@ -3499,7 +3584,8 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_bdw_event_constraints;
x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
x86_pmu.extra_regs = intel_snbep_extra_regs;
- x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
+ x86_pmu.pebs_prec_dist = true;
/* all extra regs are per-cpu when HT is on */
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
@@ -3511,6 +3597,24 @@ __init int intel_pmu_init(void)
pr_cont("Broadwell events, ");
break;
+ case 87: /* Knights Landing Xeon Phi */
+ memcpy(hw_cache_event_ids,
+ slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs,
+ knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+ intel_pmu_lbr_init_knl();
+
+ x86_pmu.event_constraints = intel_slm_event_constraints;
+ x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_knl_extra_regs;
+
+ /* all extra regs are per-cpu when HT is on */
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+ pr_cont("Knights Landing events, ");
+ break;
+
case 78: /* 14nm Skylake Mobile */
case 94: /* 14nm Skylake Desktop */
x86_pmu.late_ack = true;
@@ -3521,7 +3625,8 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_skl_event_constraints;
x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
x86_pmu.extra_regs = intel_skl_extra_regs;
- x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_skl;
+ x86_pmu.pebs_prec_dist = true;
/* all extra regs are per-cpu when HT is on */
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index 377e8f8ed391..a316ca96f1b6 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -298,7 +298,7 @@ static bool __match_event(struct perf_event *a, struct perf_event *b)
static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
{
if (event->attach_state & PERF_ATTACH_TASK)
- return perf_cgroup_from_task(event->hw.target);
+ return perf_cgroup_from_task(event->hw.target, event->ctx);
return event->cgrp;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 5db1c7755548..10602f0a438f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -620,6 +620,8 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
EVENT_CONSTRAINT_END
};
@@ -686,6 +688,8 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -700,6 +704,8 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
@@ -718,9 +724,10 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
struct event_constraint intel_skl_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
- /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+ /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
@@ -1101,6 +1108,13 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
void *at;
u64 pebs_status;
+ /*
+ * fmt0 does not have a status bitfield (does not use
+ * perf_record_nhm format)
+ */
+ if (x86_pmu.intel_cap.pebs_format < 1)
+ return base;
+
if (base == NULL)
return NULL;
@@ -1186,7 +1200,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
if (!event->attr.precise_ip)
return;
- n = (top - at) / x86_pmu.pebs_record_size;
+ n = top - at;
if (n <= 0)
return;
@@ -1230,12 +1244,21 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
pebs_status = p->status & cpuc->pebs_enabled;
pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
+ /*
+ * On some CPUs the PEBS status can be zero when PEBS is
+ * racing with clearing of GLOBAL_STATUS.
+ *
+ * Normally we would drop that record, but in the
+ * case when there is only a single active PEBS event
+ * we can assume it's for that event.
+ */
+ if (!pebs_status && cpuc->pebs_enabled &&
+ !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
+ pebs_status = cpuc->pebs_enabled;
+
bit = find_first_bit((unsigned long *)&pebs_status,
x86_pmu.max_pebs_events);
- if (WARN(bit >= x86_pmu.max_pebs_events,
- "PEBS record without PEBS event! status=%Lx pebs_enabled=%Lx active_mask=%Lx",
- (unsigned long long)p->status, (unsigned long long)cpuc->pebs_enabled,
- *(unsigned long long *)cpuc->active_mask))
+ if (bit >= x86_pmu.max_pebs_events)
continue;
/*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index bfd0b717e944..653f88d25987 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -42,6 +42,13 @@ static enum {
#define LBR_FAR_BIT 8 /* do not capture far branches */
#define LBR_CALL_STACK_BIT 9 /* enable call stack */
+/*
+ * Following bit only exists in Linux; we mask it out before writing it to
+ * the actual MSR. But it helps the constraint perf code to understand
+ * that this is a separate configuration.
+ */
+#define LBR_NO_INFO_BIT 63 /* don't read LBR_INFO. */
+
#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
#define LBR_USER (1 << LBR_USER_BIT)
#define LBR_JCC (1 << LBR_JCC_BIT)
@@ -52,6 +59,7 @@ static enum {
#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
#define LBR_FAR (1 << LBR_FAR_BIT)
#define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT)
+#define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT)
#define LBR_PLM (LBR_KERNEL | LBR_USER)
@@ -152,8 +160,8 @@ static void __intel_pmu_lbr_enable(bool pmi)
* did not change.
*/
if (cpuc->lbr_sel)
- lbr_select = cpuc->lbr_sel->config;
- if (!pmi)
+ lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
+ if (!pmi && cpuc->lbr_sel)
wrmsrl(MSR_LBR_SELECT, lbr_select);
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
@@ -239,7 +247,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
}
mask = x86_pmu.lbr_nr - 1;
- tos = intel_pmu_lbr_tos();
+ tos = task_ctx->tos;
for (i = 0; i < tos; i++) {
lbr_idx = (tos - i) & mask;
wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
@@ -247,6 +255,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
}
+ wrmsrl(x86_pmu.lbr_tos, tos);
task_ctx->lbr_stack_state = LBR_NONE;
}
@@ -270,6 +279,7 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
}
+ task_ctx->tos = tos;
task_ctx->lbr_stack_state = LBR_VALID;
}
@@ -420,6 +430,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
*/
static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
{
+ bool need_info = false;
unsigned long mask = x86_pmu.lbr_nr - 1;
int lbr_format = x86_pmu.intel_cap.lbr_format;
u64 tos = intel_pmu_lbr_tos();
@@ -427,8 +438,11 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
int out = 0;
int num = x86_pmu.lbr_nr;
- if (cpuc->lbr_sel->config & LBR_CALL_STACK)
- num = tos;
+ if (cpuc->lbr_sel) {
+ need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
+ if (cpuc->lbr_sel->config & LBR_CALL_STACK)
+ num = tos;
+ }
for (i = 0; i < num; i++) {
unsigned long lbr_idx = (tos - i) & mask;
@@ -440,7 +454,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
- if (lbr_format == LBR_FORMAT_INFO) {
+ if (lbr_format == LBR_FORMAT_INFO && need_info) {
u64 info;
rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
@@ -588,6 +602,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
if (v != LBR_IGN)
mask |= v;
}
+
reg = &event->hw.branch_reg;
reg->idx = EXTRA_REG_LBR;
@@ -598,6 +613,11 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
*/
reg->config = mask ^ x86_pmu.lbr_sel_mask;
+ if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
+ (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
+ (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
+ reg->config |= LBR_NO_INFO;
+
return 0;
}
@@ -1026,3 +1046,17 @@ void __init intel_pmu_lbr_init_atom(void)
*/
pr_cont("8-deep LBR, ");
}
+
+/* Knights Landing */
+void intel_pmu_lbr_init_knl(void)
+{
+ x86_pmu.lbr_nr = 8;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+ x86_pmu.lbr_to = MSR_LBR_NHM_TO;
+
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ x86_pmu.lbr_sel_map = snb_lbr_sel_map;
+
+ pr_cont("8-deep LBR, ");
+}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index 868e1194337f..c0bbd1033b7c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -27,6 +27,7 @@
#include <asm/perf_event.h>
#include <asm/insn.h>
#include <asm/io.h>
+#include <asm/intel_pt.h>
#include "perf_event.h"
#include "intel_pt.h"
@@ -1122,6 +1123,14 @@ static int pt_event_init(struct perf_event *event)
return 0;
}
+void cpu_emergency_stop_pt(void)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+
+ if (pt->handle.event)
+ pt_event_stop(pt->handle.event, PERF_EF_UPDATE);
+}
+
static __init int pt_init(void)
{
int ret, cpu, prior_warn = 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index ed446bdcbf31..24a351ad628d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -63,7 +63,7 @@
#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
#define NR_RAPL_DOMAINS 0x4
-static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
+static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
"pp0-core",
"package",
"dram",
@@ -109,11 +109,11 @@ static struct kobj_attribute format_attr_##_var = \
#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */
-#define RAPL_EVENT_ATTR_STR(_name, v, str) \
-static struct perf_pmu_events_attr event_attr_##v = { \
- .attr = __ATTR(_name, 0444, rapl_sysfs_show, NULL), \
- .id = 0, \
- .event_str = str, \
+#define RAPL_EVENT_ATTR_STR(_name, v, str) \
+static struct perf_pmu_events_attr event_attr_##v = { \
+ .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
+ .id = 0, \
+ .event_str = str, \
};
struct rapl_pmu {
@@ -405,19 +405,6 @@ static struct attribute_group rapl_pmu_attr_group = {
.attrs = rapl_pmu_attrs,
};
-static ssize_t rapl_sysfs_show(struct device *dev,
- struct device_attribute *attr,
- char *page)
-{
- struct perf_pmu_events_attr *pmu_attr = \
- container_of(attr, struct perf_pmu_events_attr, attr);
-
- if (pmu_attr->event_str)
- return sprintf(page, "%s", pmu_attr->event_str);
-
- return 0;
-}
-
RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 61215a69b03d..f97f8075bf04 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -884,6 +884,15 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
* each box has a different function id.
*/
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
+ /* Knights Landing uses a common PCI device ID for multiple instances of
+ * an uncore PMU device type. There is only one entry per device type in
+ * the knl_uncore_pci_ids table inspite of multiple devices present for
+ * some device types. Hence PCI device idx would be 0 for all devices.
+ * So increment pmu pointer to point to an unused array element.
+ */
+ if (boot_cpu_data.x86_model == 87)
+ while (pmu->func_id >= 0)
+ pmu++;
if (pmu->func_id < 0)
pmu->func_id = pdev->devfn;
else
@@ -966,6 +975,7 @@ static int __init uncore_pci_init(void)
case 63: /* Haswell-EP */
ret = hswep_uncore_pci_init();
break;
+ case 79: /* BDX-EP */
case 86: /* BDX-DE */
ret = bdx_uncore_pci_init();
break;
@@ -982,6 +992,9 @@ static int __init uncore_pci_init(void)
case 61: /* Broadwell */
ret = bdw_uncore_pci_init();
break;
+ case 87: /* Knights Landing */
+ ret = knl_uncore_pci_init();
+ break;
default:
return 0;
}
@@ -1287,9 +1300,13 @@ static int __init uncore_cpu_init(void)
case 63: /* Haswell-EP */
hswep_uncore_cpu_init();
break;
+ case 79: /* BDX-EP */
case 86: /* BDX-DE */
bdx_uncore_cpu_init();
break;
+ case 87: /* Knights Landing */
+ knl_uncore_cpu_init();
+ break;
default:
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 2f0a4a98e16b..07aa2d6bd710 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -338,6 +338,7 @@ int hsw_uncore_pci_init(void);
int bdw_uncore_pci_init(void);
void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
+int snb_pci2phy_map_init(int devid);
/* perf_event_intel_uncore_snbep.c */
int snbep_uncore_pci_init(void);
@@ -348,6 +349,8 @@ int hswep_uncore_pci_init(void);
void hswep_uncore_cpu_init(void);
int bdx_uncore_pci_init(void);
void bdx_uncore_cpu_init(void);
+int knl_uncore_pci_init(void);
+void knl_uncore_cpu_init(void);
/* perf_event_intel_uncore_nhmex.c */
void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index 845256158a10..0b934820fafd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -417,7 +417,7 @@ static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
}
}
-static int snb_pci2phy_map_init(int devid)
+int snb_pci2phy_map_init(int devid)
{
struct pci_dev *dev = NULL;
struct pci2phy_map *map;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index f0f4fcba252e..33acb884ccf1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -209,31 +209,98 @@
#define HSWEP_PCU_MSR_PMON_BOX_CTL 0x710
#define HSWEP_PCU_MSR_PMON_BOX_FILTER 0x715
+/* KNL Ubox */
+#define KNL_U_MSR_PMON_RAW_EVENT_MASK \
+ (SNBEP_U_MSR_PMON_RAW_EVENT_MASK | \
+ SNBEP_CBO_PMON_CTL_TID_EN)
+/* KNL CHA */
+#define KNL_CHA_MSR_OFFSET 0xc
+#define KNL_CHA_MSR_PMON_CTL_QOR (1 << 16)
+#define KNL_CHA_MSR_PMON_RAW_EVENT_MASK \
+ (SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK | \
+ KNL_CHA_MSR_PMON_CTL_QOR)
+#define KNL_CHA_MSR_PMON_BOX_FILTER_TID 0x1ff
+#define KNL_CHA_MSR_PMON_BOX_FILTER_STATE (7 << 18)
+#define KNL_CHA_MSR_PMON_BOX_FILTER_OP (0xfffffe2aULL << 32)
+
+/* KNL EDC/MC UCLK */
+#define KNL_UCLK_MSR_PMON_CTR0_LOW 0x400
+#define KNL_UCLK_MSR_PMON_CTL0 0x420
+#define KNL_UCLK_MSR_PMON_BOX_CTL 0x430
+#define KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW 0x44c
+#define KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL 0x454
+#define KNL_PMON_FIXED_CTL_EN 0x1
+
+/* KNL EDC */
+#define KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW 0xa00
+#define KNL_EDC0_ECLK_MSR_PMON_CTL0 0xa20
+#define KNL_EDC0_ECLK_MSR_PMON_BOX_CTL 0xa30
+#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW 0xa3c
+#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL 0xa44
+
+/* KNL MC */
+#define KNL_MC0_CH0_MSR_PMON_CTR0_LOW 0xb00
+#define KNL_MC0_CH0_MSR_PMON_CTL0 0xb20
+#define KNL_MC0_CH0_MSR_PMON_BOX_CTL 0xb30
+#define KNL_MC0_CH0_MSR_PMON_FIXED_LOW 0xb3c
+#define KNL_MC0_CH0_MSR_PMON_FIXED_CTL 0xb44
+
+/* KNL IRP */
+#define KNL_IRP_PCI_PMON_BOX_CTL 0xf0
+#define KNL_IRP_PCI_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
+ KNL_CHA_MSR_PMON_CTL_QOR)
+/* KNL PCU */
+#define KNL_PCU_PMON_CTL_EV_SEL_MASK 0x0000007f
+#define KNL_PCU_PMON_CTL_USE_OCC_CTR (1 << 7)
+#define KNL_PCU_MSR_PMON_CTL_TRESH_MASK 0x3f000000
+#define KNL_PCU_MSR_PMON_RAW_EVENT_MASK \
+ (KNL_PCU_PMON_CTL_EV_SEL_MASK | \
+ KNL_PCU_PMON_CTL_USE_OCC_CTR | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_CBO_PMON_CTL_TID_EN | \
+ SNBEP_PMON_CTL_EV_SEL_EXT | \
+ SNBEP_PMON_CTL_INVERT | \
+ KNL_PCU_MSR_PMON_CTL_TRESH_MASK | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6");
DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
+DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(thresh6, thresh, "config:24-29");
DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15");
DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
+DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31");
DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0");
DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid4, filter_tid, "config1:0-8");
DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5");
DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8");
DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8");
+DEFINE_UNCORE_FORMAT_ATTR(filter_link3, filter_link, "config1:12");
DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47");
DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22");
DEFINE_UNCORE_FORMAT_ATTR(filter_state3, filter_state, "config1:17-23");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state4, filter_state, "config1:18-20");
+DEFINE_UNCORE_FORMAT_ATTR(filter_local, filter_local, "config1:33");
+DEFINE_UNCORE_FORMAT_ATTR(filter_all_op, filter_all_op, "config1:35");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nnm, filter_nnm, "config1:37");
DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc3, filter_opc, "config1:41-60");
DEFINE_UNCORE_FORMAT_ATTR(filter_nc, filter_nc, "config1:62");
DEFINE_UNCORE_FORMAT_ATTR(filter_c6, filter_c6, "config1:61");
DEFINE_UNCORE_FORMAT_ATTR(filter_isoc, filter_isoc, "config1:63");
@@ -315,8 +382,9 @@ static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct pe
static void snbep_uncore_pci_init_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
+ int box_ctl = uncore_pci_box_ctl(box);
- pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, SNBEP_PMON_BOX_CTL_INT);
+ pci_write_config_dword(pdev, box_ctl, SNBEP_PMON_BOX_CTL_INT);
}
static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
@@ -1728,6 +1796,419 @@ int ivbep_uncore_pci_init(void)
}
/* end of IvyTown uncore support */
+/* KNL uncore support */
+static struct attribute *knl_uncore_ubox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh5.attr,
+ NULL,
+};
+
+static struct attribute_group knl_uncore_ubox_format_group = {
+ .name = "format",
+ .attrs = knl_uncore_ubox_formats_attr,
+};
+
+static struct intel_uncore_type knl_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = HSWEP_U_MSR_PMON_CTR0,
+ .event_ctl = HSWEP_U_MSR_PMON_CTL0,
+ .event_mask = KNL_U_MSR_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
+ .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
+ .ops = &snbep_uncore_msr_ops,
+ .format_group = &knl_uncore_ubox_format_group,
+};
+
+static struct attribute *knl_uncore_cha_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_qor.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_filter_tid4.attr,
+ &format_attr_filter_link3.attr,
+ &format_attr_filter_state4.attr,
+ &format_attr_filter_local.attr,
+ &format_attr_filter_all_op.attr,
+ &format_attr_filter_nnm.attr,
+ &format_attr_filter_opc3.attr,
+ &format_attr_filter_nc.attr,
+ &format_attr_filter_isoc.attr,
+ NULL,
+};
+
+static struct attribute_group knl_uncore_cha_format_group = {
+ .name = "format",
+ .attrs = knl_uncore_cha_formats_attr,
+};
+
+static struct event_constraint knl_uncore_cha_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x1f, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+ EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg knl_uncore_cha_extra_regs[] = {
+ SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
+ SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x3d, 0xff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x4),
+ EVENT_EXTRA_END
+};
+
+static u64 knl_cha_filter_mask(int fields)
+{
+ u64 mask = 0;
+
+ if (fields & 0x1)
+ mask |= KNL_CHA_MSR_PMON_BOX_FILTER_TID;
+ if (fields & 0x2)
+ mask |= KNL_CHA_MSR_PMON_BOX_FILTER_STATE;
+ if (fields & 0x4)
+ mask |= KNL_CHA_MSR_PMON_BOX_FILTER_OP;
+ return mask;
+}
+
+static struct event_constraint *
+knl_cha_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ return __snbep_cbox_get_constraint(box, event, knl_cha_filter_mask);
+}
+
+static int knl_cha_hw_config(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct extra_reg *er;
+ int idx = 0;
+
+ for (er = knl_uncore_cha_extra_regs; er->msr; er++) {
+ if (er->event != (event->hw.config & er->config_mask))
+ continue;
+ idx |= er->idx;
+ }
+
+ if (idx) {
+ reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
+ KNL_CHA_MSR_OFFSET * box->pmu->pmu_idx;
+ reg1->config = event->attr.config1 & knl_cha_filter_mask(idx);
+ reg1->idx = idx;
+ }
+ return 0;
+}
+
+static void hswep_cbox_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event);
+
+static struct intel_uncore_ops knl_uncore_cha_ops = {
+ .init_box = snbep_uncore_msr_init_box,
+ .disable_box = snbep_uncore_msr_disable_box,
+ .enable_box = snbep_uncore_msr_enable_box,
+ .disable_event = snbep_uncore_msr_disable_event,
+ .enable_event = hswep_cbox_enable_event,
+ .read_counter = uncore_msr_read_counter,
+ .hw_config = knl_cha_hw_config,
+ .get_constraint = knl_cha_get_constraint,
+ .put_constraint = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type knl_uncore_cha = {
+ .name = "cha",
+ .num_counters = 4,
+ .num_boxes = 38,
+ .perf_ctr_bits = 48,
+ .event_ctl = HSWEP_C0_MSR_PMON_CTL0,
+ .perf_ctr = HSWEP_C0_MSR_PMON_CTR0,
+ .event_mask = KNL_CHA_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL,
+ .msr_offset = KNL_CHA_MSR_OFFSET,
+ .num_shared_regs = 1,
+ .constraints = knl_uncore_cha_constraints,
+ .ops = &knl_uncore_cha_ops,
+ .format_group = &knl_uncore_cha_format_group,
+};
+
+static struct attribute *knl_uncore_pcu_formats_attr[] = {
+ &format_attr_event2.attr,
+ &format_attr_use_occ_ctr.attr,
+ &format_attr_occ_sel.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh6.attr,
+ &format_attr_occ_invert.attr,
+ &format_attr_occ_edge_det.attr,
+ NULL,
+};
+
+static struct attribute_group knl_uncore_pcu_format_group = {
+ .name = "format",
+ .attrs = knl_uncore_pcu_formats_attr,
+};
+
+static struct intel_uncore_type knl_uncore_pcu = {
+ .name = "pcu",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = HSWEP_PCU_MSR_PMON_CTR0,
+ .event_ctl = HSWEP_PCU_MSR_PMON_CTL0,
+ .event_mask = KNL_PCU_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_PCU_MSR_PMON_BOX_CTL,
+ .ops = &snbep_uncore_msr_ops,
+ .format_group = &knl_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *knl_msr_uncores[] = {
+ &knl_uncore_ubox,
+ &knl_uncore_cha,
+ &knl_uncore_pcu,
+ NULL,
+};
+
+void knl_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = knl_msr_uncores;
+}
+
+static void knl_uncore_imc_enable_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ int box_ctl = uncore_pci_box_ctl(box);
+
+ pci_write_config_dword(pdev, box_ctl, 0);
+}
+
+static void knl_uncore_imc_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ if ((event->attr.config & SNBEP_PMON_CTL_EV_SEL_MASK)
+ == UNCORE_FIXED_EVENT)
+ pci_write_config_dword(pdev, hwc->config_base,
+ hwc->config | KNL_PMON_FIXED_CTL_EN);
+ else
+ pci_write_config_dword(pdev, hwc->config_base,
+ hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops knl_uncore_imc_ops = {
+ .init_box = snbep_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = knl_uncore_imc_enable_box,
+ .read_counter = snbep_uncore_pci_read_counter,
+ .enable_event = knl_uncore_imc_enable_event,
+ .disable_event = snbep_uncore_pci_disable_event,
+};
+
+static struct intel_uncore_type knl_uncore_imc_uclk = {
+ .name = "imc_uclk",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = KNL_UCLK_MSR_PMON_CTR0_LOW,
+ .event_ctl = KNL_UCLK_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW,
+ .fixed_ctl = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL,
+ .box_ctl = KNL_UCLK_MSR_PMON_BOX_CTL,
+ .ops = &knl_uncore_imc_ops,
+ .format_group = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type knl_uncore_imc_dclk = {
+ .name = "imc",
+ .num_counters = 4,
+ .num_boxes = 6,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = KNL_MC0_CH0_MSR_PMON_CTR0_LOW,
+ .event_ctl = KNL_MC0_CH0_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = KNL_MC0_CH0_MSR_PMON_FIXED_LOW,
+ .fixed_ctl = KNL_MC0_CH0_MSR_PMON_FIXED_CTL,
+ .box_ctl = KNL_MC0_CH0_MSR_PMON_BOX_CTL,
+ .ops = &knl_uncore_imc_ops,
+ .format_group = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type knl_uncore_edc_uclk = {
+ .name = "edc_uclk",
+ .num_counters = 4,
+ .num_boxes = 8,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = KNL_UCLK_MSR_PMON_CTR0_LOW,
+ .event_ctl = KNL_UCLK_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW,
+ .fixed_ctl = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL,
+ .box_ctl = KNL_UCLK_MSR_PMON_BOX_CTL,
+ .ops = &knl_uncore_imc_ops,
+ .format_group = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type knl_uncore_edc_eclk = {
+ .name = "edc_eclk",
+ .num_counters = 4,
+ .num_boxes = 8,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW,
+ .event_ctl = KNL_EDC0_ECLK_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW,
+ .fixed_ctl = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL,
+ .box_ctl = KNL_EDC0_ECLK_MSR_PMON_BOX_CTL,
+ .ops = &knl_uncore_imc_ops,
+ .format_group = &snbep_uncore_format_group,
+};
+
+static struct event_constraint knl_uncore_m2pcie_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type knl_uncore_m2pcie = {
+ .name = "m2pcie",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .constraints = knl_uncore_m2pcie_constraints,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct attribute *knl_uncore_irp_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_qor.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static struct attribute_group knl_uncore_irp_format_group = {
+ .name = "format",
+ .attrs = knl_uncore_irp_formats_attr,
+};
+
+static struct intel_uncore_type knl_uncore_irp = {
+ .name = "irp",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = KNL_IRP_PCI_PMON_RAW_EVENT_MASK,
+ .box_ctl = KNL_IRP_PCI_PMON_BOX_CTL,
+ .ops = &snbep_uncore_pci_ops,
+ .format_group = &knl_uncore_irp_format_group,
+};
+
+enum {
+ KNL_PCI_UNCORE_MC_UCLK,
+ KNL_PCI_UNCORE_MC_DCLK,
+ KNL_PCI_UNCORE_EDC_UCLK,
+ KNL_PCI_UNCORE_EDC_ECLK,
+ KNL_PCI_UNCORE_M2PCIE,
+ KNL_PCI_UNCORE_IRP,
+};
+
+static struct intel_uncore_type *knl_pci_uncores[] = {
+ [KNL_PCI_UNCORE_MC_UCLK] = &knl_uncore_imc_uclk,
+ [KNL_PCI_UNCORE_MC_DCLK] = &knl_uncore_imc_dclk,
+ [KNL_PCI_UNCORE_EDC_UCLK] = &knl_uncore_edc_uclk,
+ [KNL_PCI_UNCORE_EDC_ECLK] = &knl_uncore_edc_eclk,
+ [KNL_PCI_UNCORE_M2PCIE] = &knl_uncore_m2pcie,
+ [KNL_PCI_UNCORE_IRP] = &knl_uncore_irp,
+ NULL,
+};
+
+/*
+ * KNL uses a common PCI device ID for multiple instances of an Uncore PMU
+ * device type. prior to KNL, each instance of a PMU device type had a unique
+ * device ID.
+ *
+ * PCI Device ID Uncore PMU Devices
+ * ----------------------------------
+ * 0x7841 MC0 UClk, MC1 UClk
+ * 0x7843 MC0 DClk CH 0, MC0 DClk CH 1, MC0 DClk CH 2,
+ * MC1 DClk CH 0, MC1 DClk CH 1, MC1 DClk CH 2
+ * 0x7833 EDC0 UClk, EDC1 UClk, EDC2 UClk, EDC3 UClk,
+ * EDC4 UClk, EDC5 UClk, EDC6 UClk, EDC7 UClk
+ * 0x7835 EDC0 EClk, EDC1 EClk, EDC2 EClk, EDC3 EClk,
+ * EDC4 EClk, EDC5 EClk, EDC6 EClk, EDC7 EClk
+ * 0x7817 M2PCIe
+ * 0x7814 IRP
+*/
+
+static const struct pci_device_id knl_uncore_pci_ids[] = {
+ { /* MC UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0),
+ },
+ { /* MC DClk Channel */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0),
+ },
+ { /* EDC UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0),
+ },
+ { /* EDC EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0),
+ },
+ { /* M2PCIe */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_M2PCIE, 0),
+ },
+ { /* IRP */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7814),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_IRP, 0),
+ },
+ { /* end: all zeroes */ }
+};
+
+static struct pci_driver knl_uncore_pci_driver = {
+ .name = "knl_uncore",
+ .id_table = knl_uncore_pci_ids,
+};
+
+int knl_uncore_pci_init(void)
+{
+ int ret;
+
+ /* All KNL PCI based PMON units are on the same PCI bus except IRP */
+ ret = snb_pci2phy_map_init(0x7814); /* IRP */
+ if (ret)
+ return ret;
+ ret = snb_pci2phy_map_init(0x7817); /* M2PCIe */
+ if (ret)
+ return ret;
+ uncore_pci_uncores = knl_pci_uncores;
+ uncore_pci_driver = &knl_uncore_pci_driver;
+ return 0;
+}
+
+/* end of KNL uncore support */
+
/* Haswell-EP uncore support */
static struct attribute *hswep_uncore_ubox_formats_attr[] = {
&format_attr_event.attr,
@@ -2338,7 +2819,7 @@ int hswep_uncore_pci_init(void)
}
/* end of Haswell-EP uncore support */
-/* BDX-DE uncore support */
+/* BDX uncore support */
static struct intel_uncore_type bdx_uncore_ubox = {
.name = "ubox",
@@ -2360,13 +2841,14 @@ static struct event_constraint bdx_uncore_cbox_constraints[] = {
UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x3e, 0x1),
EVENT_CONSTRAINT_END
};
static struct intel_uncore_type bdx_uncore_cbox = {
.name = "cbox",
.num_counters = 4,
- .num_boxes = 8,
+ .num_boxes = 24,
.perf_ctr_bits = 48,
.event_ctl = HSWEP_C0_MSR_PMON_CTL0,
.perf_ctr = HSWEP_C0_MSR_PMON_CTR0,
@@ -2379,9 +2861,24 @@ static struct intel_uncore_type bdx_uncore_cbox = {
.format_group = &hswep_uncore_cbox_format_group,
};
+static struct intel_uncore_type bdx_uncore_sbox = {
+ .name = "sbox",
+ .num_counters = 4,
+ .num_boxes = 4,
+ .perf_ctr_bits = 48,
+ .event_ctl = HSWEP_S0_MSR_PMON_CTL0,
+ .perf_ctr = HSWEP_S0_MSR_PMON_CTR0,
+ .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL,
+ .msr_offset = HSWEP_SBOX_MSR_OFFSET,
+ .ops = &hswep_uncore_sbox_msr_ops,
+ .format_group = &hswep_uncore_sbox_format_group,
+};
+
static struct intel_uncore_type *bdx_msr_uncores[] = {
&bdx_uncore_ubox,
&bdx_uncore_cbox,
+ &bdx_uncore_sbox,
&hswep_uncore_pcu,
NULL,
};
@@ -2396,7 +2893,7 @@ void bdx_uncore_cpu_init(void)
static struct intel_uncore_type bdx_uncore_ha = {
.name = "ha",
.num_counters = 4,
- .num_boxes = 1,
+ .num_boxes = 2,
.perf_ctr_bits = 48,
SNBEP_UNCORE_PCI_COMMON_INIT(),
};
@@ -2404,7 +2901,7 @@ static struct intel_uncore_type bdx_uncore_ha = {
static struct intel_uncore_type bdx_uncore_imc = {
.name = "imc",
.num_counters = 5,
- .num_boxes = 2,
+ .num_boxes = 8,
.perf_ctr_bits = 48,
.fixed_ctr_bits = 48,
.fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
@@ -2424,6 +2921,19 @@ static struct intel_uncore_type bdx_uncore_irp = {
.format_group = &snbep_uncore_format_group,
};
+static struct intel_uncore_type bdx_uncore_qpi = {
+ .name = "qpi",
+ .num_counters = 4,
+ .num_boxes = 3,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &snbep_uncore_qpi_ops,
+ .format_group = &snbep_uncore_qpi_format_group,
+};
static struct event_constraint bdx_uncore_r2pcie_constraints[] = {
UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
@@ -2432,6 +2942,8 @@ static struct event_constraint bdx_uncore_r2pcie_constraints[] = {
UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
EVENT_CONSTRAINT_END
};
@@ -2445,18 +2957,65 @@ static struct intel_uncore_type bdx_uncore_r2pcie = {
SNBEP_UNCORE_PCI_COMMON_INIT(),
};
+static struct event_constraint bdx_uncore_r3qpi_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x01, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x07, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x08, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x09, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x0a, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x0e, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x15, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x1f, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type bdx_uncore_r3qpi = {
+ .name = "r3qpi",
+ .num_counters = 3,
+ .num_boxes = 3,
+ .perf_ctr_bits = 48,
+ .constraints = bdx_uncore_r3qpi_constraints,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
enum {
BDX_PCI_UNCORE_HA,
BDX_PCI_UNCORE_IMC,
BDX_PCI_UNCORE_IRP,
+ BDX_PCI_UNCORE_QPI,
BDX_PCI_UNCORE_R2PCIE,
+ BDX_PCI_UNCORE_R3QPI,
};
static struct intel_uncore_type *bdx_pci_uncores[] = {
[BDX_PCI_UNCORE_HA] = &bdx_uncore_ha,
[BDX_PCI_UNCORE_IMC] = &bdx_uncore_imc,
[BDX_PCI_UNCORE_IRP] = &bdx_uncore_irp,
+ [BDX_PCI_UNCORE_QPI] = &bdx_uncore_qpi,
[BDX_PCI_UNCORE_R2PCIE] = &bdx_uncore_r2pcie,
+ [BDX_PCI_UNCORE_R3QPI] = &bdx_uncore_r3qpi,
NULL,
};
@@ -2465,6 +3024,10 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30),
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0),
},
+ { /* Home Agent 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f38),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 1),
+ },
{ /* MC0 Channel 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb0),
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 0),
@@ -2473,14 +3036,74 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb1),
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 1),
},
+ { /* MC0 Channel 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb4),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 2),
+ },
+ { /* MC0 Channel 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb5),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 3),
+ },
+ { /* MC1 Channel 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd0),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 4),
+ },
+ { /* MC1 Channel 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd1),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 5),
+ },
+ { /* MC1 Channel 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd4),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 6),
+ },
+ { /* MC1 Channel 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd5),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 7),
+ },
{ /* IRP */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f39),
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IRP, 0),
},
+ { /* QPI0 Port 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f32),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 0),
+ },
+ { /* QPI0 Port 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f33),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 1),
+ },
+ { /* QPI1 Port 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3a),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 2),
+ },
{ /* R2PCIe */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f34),
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R2PCIE, 0),
},
+ { /* R3QPI0 Link 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f36),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 0),
+ },
+ { /* R3QPI0 Link 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f37),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 1),
+ },
+ { /* R3QPI1 Link 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3e),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 2),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f86),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 0),
+ },
+ { /* QPI Port 1 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f96),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 1),
+ },
+ { /* QPI Port 2 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2),
+ },
{ /* end: all zeroes */ }
};
@@ -2500,4 +3123,4 @@ int bdx_uncore_pci_init(void)
return 0;
}
-/* end of BDX-DE uncore support */
+/* end of BDX uncore support */
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index 136ac74dee82..819d94982e07 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -33,28 +33,27 @@ static int __init x86_rdrand_setup(char *s)
__setup("nordrand", x86_rdrand_setup);
/*
- * Force a reseed cycle; we are architecturally guaranteed a reseed
- * after no more than 512 128-bit chunks of random data. This also
- * acts as a test of the CPU capability.
+ * RDRAND has Built-In-Self-Test (BIST) that runs on every invocation.
+ * Run the instruction a few times as a sanity check.
+ * If it fails, it is simple to disable RDRAND here.
*/
-#define RESEED_LOOP ((512*128)/sizeof(unsigned long))
+#define SANITY_CHECK_LOOPS 8
void x86_init_rdrand(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_ARCH_RANDOM
unsigned long tmp;
- int i, count, ok;
+ int i;
if (!cpu_has(c, X86_FEATURE_RDRAND))
- return; /* Nothing to do */
+ return;
- for (count = i = 0; i < RESEED_LOOP; i++) {
- ok = rdrand_long(&tmp);
- if (ok)
- count++;
+ for (i = 0; i < SANITY_CHECK_LOOPS; i++) {
+ if (!rdrand_long(&tmp)) {
+ clear_cpu_cap(c, X86_FEATURE_RDRAND);
+ printk_once(KERN_WARNING "rdrand: disabled\n");
+ return;
+ }
}
-
- if (count != RESEED_LOOP)
- clear_cpu_cap(c, X86_FEATURE_RDRAND);
#endif
}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 608fb26c7254..8cb57df9398d 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -31,32 +31,12 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
const struct cpuid_bit *cb;
static const struct cpuid_bit cpuid_bits[] = {
- { X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 },
- { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 },
- { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 },
- { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 },
- { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 },
- { X86_FEATURE_HWP, CR_EAX, 7, 0x00000006, 0 },
- { X86_FEATURE_HWP_NOTIFY, CR_EAX, 8, 0x00000006, 0 },
- { X86_FEATURE_HWP_ACT_WINDOW, CR_EAX, 9, 0x00000006, 0 },
- { X86_FEATURE_HWP_EPP, CR_EAX,10, 0x00000006, 0 },
- { X86_FEATURE_HWP_PKG_REQ, CR_EAX,11, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 },
{ X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 },
- { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 },
- { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 },
- { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 },
- { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 },
- { X86_FEATURE_TSCRATEMSR, CR_EDX, 4, 0x8000000a, 0 },
- { X86_FEATURE_VMCBCLEAN, CR_EDX, 5, 0x8000000a, 0 },
- { X86_FEATURE_FLUSHBYASID, CR_EDX, 6, 0x8000000a, 0 },
- { X86_FEATURE_DECODEASSISTS, CR_EDX, 7, 0x8000000a, 0 },
- { X86_FEATURE_PAUSEFILTER, CR_EDX,10, 0x8000000a, 0 },
- { X86_FEATURE_PFTHRESHOLD, CR_EDX,12, 0x8000000a, 0 },
{ 0, 0, 0, 0, 0 }
};
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 3fa0e5ad86b4..252da7aceca6 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -12,7 +12,7 @@ static void early_init_transmeta(struct cpuinfo_x86 *c)
xlvl = cpuid_eax(0x80860000);
if ((xlvl & 0xffff0000) == 0x80860000) {
if (xlvl >= 0x80860001)
- c->x86_capability[2] = cpuid_edx(0x80860001);
+ c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001);
}
}
@@ -82,7 +82,7 @@ static void init_transmeta(struct cpuinfo_x86 *c)
/* Unhide possibly hidden capability flags */
rdmsr(0x80860004, cap_mask, uk);
wrmsr(0x80860004, ~0, uk);
- c->x86_capability[0] = cpuid_edx(0x00000001);
+ c->x86_capability[CPUID_1_EDX] = cpuid_edx(0x00000001);
wrmsr(0x80860004, cap_mask, uk);
/* All Transmeta CPUs have a constant TSC */
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 2c1910f6717e..58f34319b29a 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -35,6 +35,7 @@
#include <asm/cpu.h>
#include <asm/reboot.h>
#include <asm/virtext.h>
+#include <asm/intel_pt.h>
/* Alignment required for elf header segment */
#define ELF_CORE_HEADER_ALIGN 4096
@@ -125,6 +126,11 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
cpu_emergency_vmxoff();
cpu_emergency_svm_disable();
+ /*
+ * Disable Intel PT to stop its logging
+ */
+ cpu_emergency_stop_pt();
+
disable_local_APIC();
}
@@ -169,6 +175,11 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
cpu_emergency_vmxoff();
cpu_emergency_svm_disable();
+ /*
+ * Disable Intel PT to stop its logging
+ */
+ cpu_emergency_stop_pt();
+
#ifdef CONFIG_X86_IO_APIC
/* Prevent crash_kexec() from deadlocking on ioapic_lock. */
ioapic_zap_locks();
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index be39b5fde4b9..7b2978ab30df 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -12,7 +12,7 @@
*/
static void fpu__init_cpu_ctx_switch(void)
{
- if (!cpu_has_eager_fpu)
+ if (!boot_cpu_has(X86_FEATURE_EAGER_FPU))
stts();
else
clts();
@@ -143,9 +143,18 @@ static void __init fpu__init_system_generic(void)
unsigned int xstate_size;
EXPORT_SYMBOL_GPL(xstate_size);
-/* Enforce that 'MEMBER' is the last field of 'TYPE': */
+/* Get alignment of the TYPE. */
+#define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
+
+/*
+ * Enforce that 'MEMBER' is the last field of 'TYPE'.
+ *
+ * Align the computed size with alignment of the TYPE,
+ * because that's how C aligns structs.
+ */
#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \
- BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER))
+ BUILD_BUG_ON(sizeof(TYPE) != ALIGN(offsetofend(TYPE, MEMBER), \
+ TYPE_ALIGN(TYPE)))
/*
* We append the 'struct fpu' to the task_struct:
@@ -188,7 +197,7 @@ static void __init fpu__init_task_struct_size(void)
*/
static void __init fpu__init_system_xstate_size_legacy(void)
{
- static int on_boot_cpu = 1;
+ static int on_boot_cpu __initdata = 1;
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
@@ -278,7 +287,7 @@ __setup("eagerfpu=", eager_fpu_setup);
*/
static void __init fpu__init_system_ctx_switch(void)
{
- static bool on_boot_cpu = 1;
+ static bool on_boot_cpu __initdata = 1;
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
@@ -287,7 +296,7 @@ static void __init fpu__init_system_ctx_switch(void)
current_thread_info()->status = 0;
/* Auto enable eagerfpu for xsaveopt */
- if (cpu_has_xsaveopt && eagerfpu != DISABLE)
+ if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE)
eagerfpu = ENABLE;
if (xfeatures_mask & XFEATURE_MASK_EAGER) {
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index ef29b742cea7..31c6a60505e6 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -385,20 +385,19 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
*/
void fpu__init_prepare_fx_sw_frame(void)
{
- int fsave_header_size = sizeof(struct fregs_state);
int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
- if (config_enabled(CONFIG_X86_32))
- size += fsave_header_size;
-
fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
fx_sw_reserved.extended_size = size;
fx_sw_reserved.xfeatures = xfeatures_mask;
fx_sw_reserved.xstate_size = xstate_size;
- if (config_enabled(CONFIG_IA32_EMULATION)) {
+ if (config_enabled(CONFIG_IA32_EMULATION) ||
+ config_enabled(CONFIG_X86_32)) {
+ int fsave_header_size = sizeof(struct fregs_state);
+
fx_sw_reserved_ia32 = fx_sw_reserved;
- fx_sw_reserved_ia32.extended_size += fsave_header_size;
+ fx_sw_reserved_ia32.extended_size = size + fsave_header_size;
}
}
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 6454f2731b56..40f100285984 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -297,7 +297,7 @@ static void __init setup_xstate_comp(void)
*/
static void __init setup_init_fpu_buf(void)
{
- static int on_boot_cpu = 1;
+ static int on_boot_cpu __initdata = 1;
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
@@ -608,7 +608,7 @@ static void fpu__init_disable_system_xstate(void)
void __init fpu__init_system_xstate(void)
{
unsigned int eax, ebx, ecx, edx;
- static int on_boot_cpu = 1;
+ static int on_boot_cpu __initdata = 1;
int err;
WARN_ON_FPU(!on_boot_cpu);
@@ -694,7 +694,6 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
if (!boot_cpu_has(X86_FEATURE_XSAVE))
return NULL;
- xsave = &current->thread.fpu.state.xsave;
/*
* We should not ever be requesting features that we
* have not enabled. Remember that pcntxt_mask is
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 50a3fad5b89f..2bcfb5f2bc44 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -300,6 +300,10 @@ static int arch_build_bp_info(struct perf_event *bp)
return -EINVAL;
if (bp->attr.bp_addr & (bp->attr.bp_len - 1))
return -EINVAL;
+
+ if (!boot_cpu_has(X86_FEATURE_BPEXT))
+ return -EOPNOTSUPP;
+
/*
* It's impossible to use a range breakpoint to fake out
* user vs kernel detection because bp_len - 1 can't
@@ -307,8 +311,6 @@ static int arch_build_bp_info(struct perf_event *bp)
* breakpoints, then we'll have to check for kprobe-blacklisted
* addresses anywhere in the range.
*/
- if (!cpu_has_bpext)
- return -EOPNOTSUPP;
info->mask = bp->attr.bp_len - 1;
info->len = X86_BREAKPOINT_LEN_1;
}
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index dc5fa6a1e8d6..3512ba607361 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -1,7 +1,7 @@
/*
* x86 specific code for irq_work
*
- * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra
*/
#include <linux/kernel.h>
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 2bd81e302427..72cef58693c7 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -45,6 +45,11 @@ early_param("no-kvmclock", parse_no_kvmclock);
static struct pvclock_vsyscall_time_info *hv_clock;
static struct pvclock_wall_clock wall_clock;
+struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
+{
+ return hv_clock;
+}
+
/*
* The wallclock is the time of day when we booted. Since then, some time may
* have elapsed since the hypervisor wrote the data. So we try to account for
@@ -305,7 +310,6 @@ int __init kvm_setup_vsyscall_timeinfo(void)
{
#ifdef CONFIG_X86_64
int cpu;
- int ret;
u8 flags;
struct pvclock_vcpu_time_info *vcpu_time;
unsigned int size;
@@ -325,11 +329,6 @@ int __init kvm_setup_vsyscall_timeinfo(void)
return 1;
}
- if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
- put_cpu();
- return ret;
- }
-
put_cpu();
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index 94ea120fa21f..87e1762e2bca 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -278,6 +278,12 @@ trace:
/* save_mcount_regs fills in first two parameters */
save_mcount_regs
+ /*
+ * When DYNAMIC_FTRACE is not defined, ARCH_SUPPORTS_FTRACE_OPS is not
+ * set (see include/asm/ftrace.h and include/linux/ftrace.h). Only the
+ * ip and parent ip are used and the list function is called when
+ * function tracing is enabled.
+ */
call *ftrace_trace_function
restore_mcount_regs
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 697f90db0e37..8a2cdd736fa4 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -29,6 +29,7 @@
#include <asm/mach_traps.h>
#include <asm/nmi.h>
#include <asm/x86_init.h>
+#include <asm/reboot.h>
#define CREATE_TRACE_POINTS
#include <trace/events/nmi.h>
@@ -231,7 +232,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
#endif
if (panic_on_unrecovered_nmi)
- panic("NMI: Not continuing");
+ nmi_panic(regs, "NMI: Not continuing");
pr_emerg("Dazed and confused, but trying to continue\n");
@@ -255,8 +256,16 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
reason, smp_processor_id());
show_regs(regs);
- if (panic_on_io_nmi)
- panic("NMI IOCK error: Not continuing");
+ if (panic_on_io_nmi) {
+ nmi_panic(regs, "NMI IOCK error: Not continuing");
+
+ /*
+ * If we end up here, it means we have received an NMI while
+ * processing panic(). Simply return without delaying and
+ * re-enabling NMIs.
+ */
+ return;
+ }
/* Re-enable the IOCK line, wait for a few seconds */
reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
@@ -297,7 +306,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
pr_emerg("Do you have a strange power saving mode enabled?\n");
if (unknown_nmi_panic || panic_on_unrecovered_nmi)
- panic("NMI: Not continuing");
+ nmi_panic(regs, "NMI: Not continuing");
pr_emerg("Dazed and confused, but trying to continue\n");
}
@@ -348,8 +357,19 @@ static void default_do_nmi(struct pt_regs *regs)
return;
}
- /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
- raw_spin_lock(&nmi_reason_lock);
+ /*
+ * Non-CPU-specific NMI: NMI sources can be processed on any CPU.
+ *
+ * Another CPU may be processing panic routines while holding
+ * nmi_reason_lock. Check if the CPU issued the IPI for crash dumping,
+ * and if so, call its callback directly. If there is no CPU preparing
+ * crash dump, we simply loop here.
+ */
+ while (!raw_spin_trylock(&nmi_reason_lock)) {
+ run_crash_ipi_callback(regs);
+ cpu_relax();
+ }
+
reason = x86_platform.get_nmi_reason();
if (reason & NMI_REASON_MASK) {
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c2130aef3f9d..f08ac28b8136 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -74,16 +74,6 @@ void __init default_banner(void)
/* Undefined instruction for dealing with missing ops pointers. */
static const unsigned char ud2a[] = { 0x0f, 0x0b };
-unsigned paravirt_patch_nop(void)
-{
- return 0;
-}
-
-unsigned paravirt_patch_ignore(unsigned len)
-{
- return len;
-}
-
struct branch {
unsigned char opcode;
u32 delta;
@@ -133,7 +123,6 @@ static void *get_call_destination(u8 type)
.pv_time_ops = pv_time_ops,
.pv_cpu_ops = pv_cpu_ops,
.pv_irq_ops = pv_irq_ops,
- .pv_apic_ops = pv_apic_ops,
.pv_mmu_ops = pv_mmu_ops,
#ifdef CONFIG_PARAVIRT_SPINLOCKS
.pv_lock_ops = pv_lock_ops,
@@ -152,8 +141,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
/* If there's no function, patch it with a ud2a (BUG) */
ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
else if (opfunc == _paravirt_nop)
- /* If the operation is a nop, then nop the callsite */
- ret = paravirt_patch_nop();
+ ret = 0;
/* identity functions just return their single argument */
else if (opfunc == _paravirt_ident_32)
@@ -162,10 +150,6 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
ret = paravirt_patch_ident_64(insnbuf, len);
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
-#ifdef CONFIG_X86_32
- type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
-#endif
- type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
/* If operation requires a jmp, then jmp */
ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
@@ -220,8 +204,6 @@ static u64 native_steal_clock(int cpu)
/* These are in entry.S */
extern void native_iret(void);
-extern void native_irq_enable_sysexit(void);
-extern void native_usergs_sysret32(void);
extern void native_usergs_sysret64(void);
static struct resource reserve_ioports = {
@@ -379,13 +361,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.load_sp0 = native_load_sp0,
-#if defined(CONFIG_X86_32)
- .irq_enable_sysexit = native_irq_enable_sysexit,
-#endif
#ifdef CONFIG_X86_64
-#ifdef CONFIG_IA32_EMULATION
- .usergs_sysret32 = native_usergs_sysret32,
-#endif
.usergs_sysret64 = native_usergs_sysret64,
#endif
.iret = native_iret,
@@ -403,12 +379,6 @@ NOKPROBE_SYMBOL(native_get_debugreg);
NOKPROBE_SYMBOL(native_set_debugreg);
NOKPROBE_SYMBOL(native_load_idt);
-struct pv_apic_ops pv_apic_ops = {
-#ifdef CONFIG_X86_LOCAL_APIC
- .startup_ipi_hook = paravirt_nop,
-#endif
-};
-
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
/* 32-bit pagetable entries */
#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32)
@@ -444,9 +414,6 @@ struct pv_mmu_ops pv_mmu_ops = {
.set_pmd = native_set_pmd,
.set_pmd_at = native_set_pmd_at,
.pte_update = paravirt_nop,
- .pte_update_defer = paravirt_nop,
- .pmd_update = paravirt_nop,
- .pmd_update_defer = paravirt_nop,
.ptep_modify_prot_start = __ptep_modify_prot_start,
.ptep_modify_prot_commit = __ptep_modify_prot_commit,
@@ -492,6 +459,5 @@ struct pv_mmu_ops pv_mmu_ops = {
EXPORT_SYMBOL_GPL(pv_time_ops);
EXPORT_SYMBOL (pv_cpu_ops);
EXPORT_SYMBOL (pv_mmu_ops);
-EXPORT_SYMBOL_GPL(pv_apic_ops);
EXPORT_SYMBOL_GPL(pv_info);
EXPORT_SYMBOL (pv_irq_ops);
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index c89f50a76e97..158dc0650d5d 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -5,7 +5,6 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
DEF_NATIVE(pv_cpu_ops, iret, "iret");
-DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
@@ -46,7 +45,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, restore_fl);
PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_cpu_ops, iret);
- PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_mmu_ops, read_cr2);
PATCH_SITE(pv_mmu_ops, read_cr3);
PATCH_SITE(pv_mmu_ops, write_cr3);
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 8aa05583bc42..e70087a04cc8 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -13,9 +13,7 @@ DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
-DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "swapgs; sti; sysexit");
DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
-DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
DEF_NATIVE(, mov32, "mov %edi, %eax");
@@ -55,7 +53,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_irq_ops, irq_enable);
PATCH_SITE(pv_irq_ops, irq_disable);
- PATCH_SITE(pv_cpu_ops, usergs_sysret32);
PATCH_SITE(pv_cpu_ops, usergs_sysret64);
PATCH_SITE(pv_cpu_ops, swapgs);
PATCH_SITE(pv_mmu_ops, read_cr2);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 0497f719977d..833b1d329c47 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -180,13 +180,13 @@ static void calioc2_dump_error_regs(struct iommu_table *tbl);
static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
static void get_tce_space_from_tar(void);
-static struct cal_chipset_ops calgary_chip_ops = {
+static const struct cal_chipset_ops calgary_chip_ops = {
.handle_quirks = calgary_handle_quirks,
.tce_cache_blast = calgary_tce_cache_blast,
.dump_error_regs = calgary_dump_error_regs
};
-static struct cal_chipset_ops calioc2_chip_ops = {
+static const struct cal_chipset_ops calioc2_chip_ops = {
.handle_quirks = calioc2_handle_quirks,
.tce_cache_blast = calioc2_tce_cache_blast,
.dump_error_regs = calioc2_dump_error_regs
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index adf0392d549a..7c577a178859 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -88,7 +88,7 @@ int __init pci_swiotlb_detect_4gb(void)
{
/* don't initialize swiotlb if iommu=off (no_iommu=1) */
#ifdef CONFIG_X86_64
- if (!no_iommu && max_pfn > MAX_DMA32_PFN)
+ if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN)
swiotlb = 1;
#endif
return swiotlb;
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
index 4f00b63d7ff3..14415aff1813 100644
--- a/arch/x86/kernel/pmem.c
+++ b/arch/x86/kernel/pmem.c
@@ -4,10 +4,22 @@
*/
#include <linux/platform_device.h>
#include <linux/module.h>
+#include <linux/ioport.h>
+
+static int found(u64 start, u64 end, void *data)
+{
+ return 1;
+}
static __init int register_e820_pmem(void)
{
+ char *pmem = "Persistent Memory (legacy)";
struct platform_device *pdev;
+ int rc;
+
+ rc = walk_iomem_res(pmem, IORESOURCE_MEM, 0, -1, NULL, found);
+ if (rc <= 0)
+ return 0;
/*
* See drivers/nvdimm/e820.c for the implementation, this is
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e835d263a33b..b9d99e0f82c4 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -125,7 +125,7 @@ void release_thread(struct task_struct *dead_task)
if (dead_task->mm->context.ldt) {
pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
dead_task->comm,
- dead_task->mm->context.ldt,
+ dead_task->mm->context.ldt->entries,
dead_task->mm->context.ldt->size);
BUG();
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 558f50edebca..32e9d9cbb884 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -124,21 +124,6 @@ const char *regs_query_register_name(unsigned int offset)
return NULL;
}
-static const int arg_offs_table[] = {
-#ifdef CONFIG_X86_32
- [0] = offsetof(struct pt_regs, ax),
- [1] = offsetof(struct pt_regs, dx),
- [2] = offsetof(struct pt_regs, cx)
-#else /* CONFIG_X86_64 */
- [0] = offsetof(struct pt_regs, di),
- [1] = offsetof(struct pt_regs, si),
- [2] = offsetof(struct pt_regs, dx),
- [3] = offsetof(struct pt_regs, cx),
- [4] = offsetof(struct pt_regs, r8),
- [5] = offsetof(struct pt_regs, r9)
-#endif
-};
-
/*
* does not yet catch signals sent when the child dies.
* in exit.c or in signal.c.
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 2f355d229a58..99bfc025111d 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -140,27 +140,3 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
}
-
-#ifdef CONFIG_X86_64
-/*
- * Initialize the generic pvclock vsyscall state. This will allocate
- * a/some page(s) for the per-vcpu pvclock information, set up a
- * fixmap mapping for the page(s)
- */
-
-int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
- int size)
-{
- int idx;
-
- WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
-
- for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
- __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
- __pa(i) + (idx*PAGE_SIZE),
- PAGE_KERNEL_VVAR);
- }
-
- return 0;
-}
-#endif
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 02693dd9a079..d64889aa2d46 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -718,6 +718,7 @@ static int crashing_cpu;
static nmi_shootdown_cb shootdown_callback;
static atomic_t waiting_for_crash_ipi;
+static int crash_ipi_issued;
static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
{
@@ -780,6 +781,9 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
smp_send_nmi_allbutself();
+ /* Kick CPUs looping in NMI context. */
+ WRITE_ONCE(crash_ipi_issued, 1);
+
msecs = 1000; /* Wait at most a second for the other cpus to stop */
while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
mdelay(1);
@@ -788,9 +792,35 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
/* Leave the nmi callback set */
}
+
+/*
+ * Check if the crash dumping IPI got issued and if so, call its callback
+ * directly. This function is used when we have already been in NMI handler.
+ * It doesn't return.
+ */
+void run_crash_ipi_callback(struct pt_regs *regs)
+{
+ if (crash_ipi_issued)
+ crash_nmi_callback(0, regs);
+}
+
+/* Override the weak function in kernel/panic.c */
+void nmi_panic_self_stop(struct pt_regs *regs)
+{
+ while (1) {
+ /* If no CPU is preparing crash dump, we simply loop here. */
+ run_crash_ipi_callback(regs);
+ cpu_relax();
+ }
+}
+
#else /* !CONFIG_SMP */
void nmi_shootdown_cpus(nmi_shootdown_cb callback)
{
/* No other CPUs to shoot down */
}
+
+void run_crash_ipi_callback(struct pt_regs *regs)
+{
+}
#endif
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index cd9685235df9..4af8d063fb36 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -200,6 +200,9 @@ static __init int add_rtc_cmos(void)
}
#endif
+ if (paravirt_enabled() && !paravirt_has(RTC))
+ return -ENODEV;
+
platform_device_register(&rtc_device);
dev_info(&rtc_device.dev,
"registered platform RTC device (no PNP device found)\n");
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 29db25f9a745..d3d80e6d42a2 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1048,6 +1048,8 @@ void __init setup_arch(char **cmdline_p)
if (mtrr_trim_uncached_memory(max_pfn))
max_pfn = e820_end_of_ram_pfn();
+ max_possible_pfn = max_pfn;
+
#ifdef CONFIG_X86_32
/* max_low_pfn get updated here */
find_low_pfn_range();
@@ -1250,8 +1252,6 @@ void __init setup_arch(char **cmdline_p)
if (efi_enabled(EFI_BOOT))
efi_apply_memmap_quirks();
#endif
-
- microcode_init();
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b7ffb7c00075..cb6282c3638f 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -690,12 +690,15 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
signal_setup_done(failed, ksig, stepping);
}
-#ifdef CONFIG_X86_32
-#define NR_restart_syscall __NR_restart_syscall
-#else /* !CONFIG_X86_32 */
-#define NR_restart_syscall \
- test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
-#endif /* CONFIG_X86_32 */
+static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
+{
+#if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64)
+ return __NR_restart_syscall;
+#else /* !CONFIG_X86_32 && CONFIG_X86_64 */
+ return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall :
+ __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
+#endif /* CONFIG_X86_32 || !CONFIG_X86_64 */
+}
/*
* Note that 'init' is a special process: it doesn't get signals it doesn't
@@ -724,7 +727,7 @@ void do_signal(struct pt_regs *regs)
break;
case -ERESTART_RESTARTBLOCK:
- regs->ax = NR_restart_syscall;
+ regs->ax = get_nr_restart_syscall(regs);
regs->ip -= 2;
break;
}
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 12c8286206ce..658777cf3851 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -125,12 +125,12 @@ static void native_smp_send_reschedule(int cpu)
WARN_ON(1);
return;
}
- apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
+ apic->send_IPI(cpu, RESCHEDULE_VECTOR);
}
void native_send_call_func_single_ipi(int cpu)
{
- apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+ apic->send_IPI(cpu, CALL_FUNCTION_SINGLE_VECTOR);
}
void native_send_call_func_ipi(const struct cpumask *mask)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 892ee2e5ecbc..24d57f77b3c1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -304,7 +304,7 @@ do { \
static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
- if (cpu_has_topoext) {
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
if (c->phys_proc_id == o->phys_proc_id &&
@@ -509,7 +509,7 @@ void __inquire_remote_apic(int apicid)
*/
#define UDELAY_10MS_DEFAULT 10000
-static unsigned int init_udelay = INT_MAX;
+static unsigned int init_udelay = UINT_MAX;
static int __init cpu_init_udelay(char *str)
{
@@ -522,14 +522,15 @@ early_param("cpu_init_udelay", cpu_init_udelay);
static void __init smp_quirk_init_udelay(void)
{
/* if cmdline changed it from default, leave it alone */
- if (init_udelay != INT_MAX)
+ if (init_udelay != UINT_MAX)
return;
/* if modern processor, use no delay */
if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
- ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF)))
+ ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) {
init_udelay = 0;
-
+ return;
+ }
/* else, use legacy delay */
init_udelay = UDELAY_10MS_DEFAULT;
}
@@ -629,13 +630,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
num_starts = 0;
/*
- * Paravirt / VMI wants a startup IPI hook here to set up the
- * target processor state.
- */
- startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
- stack_start);
-
- /*
* Run STARTUP IPI loop.
*/
pr_debug("#startup loops: %d\n", num_starts);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c7c4d9c51e99..3d743da828d3 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1185,8 +1185,6 @@ void __init tsc_init(void)
u64 lpj;
int cpu;
- x86_init.timers.tsc_pre_init();
-
if (!cpu_has_tsc) {
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 524619351961..483231ebbb0b 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -357,8 +357,10 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
tss = &per_cpu(cpu_tss, get_cpu());
/* make room for real-mode segments */
tsk->thread.sp0 += 16;
- if (cpu_has_sep)
+
+ if (static_cpu_has_safe(X86_FEATURE_SEP))
tsk->thread.sysenter_cs = 0;
+
load_sp0(tss, &tsk->thread);
put_cpu();
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 3839628d962e..dad5fe9633a3 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -68,7 +68,6 @@ struct x86_init_ops x86_init __initdata = {
.timers = {
.setup_percpu_clockev = setup_boot_APIC_clock,
- .tsc_pre_init = x86_init_noop,
.timer_init = hpet_time_init,
.wallclock_init = x86_init_noop,
},
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 06332cb7e7d1..c8eda1498121 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -2,6 +2,7 @@
#define ARCH_X86_KVM_CPUID_H
#include "x86.h"
+#include <asm/cpu.h>
int kvm_update_cpuid(struct kvm_vcpu *vcpu);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
@@ -38,6 +39,14 @@ static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
return best && (best->ecx & bit(X86_FEATURE_XSAVE));
}
+static inline bool guest_cpuid_has_mtrr(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 1, 0);
+ return best && (best->edx & bit(X86_FEATURE_MTRR));
+}
+
static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -170,4 +179,37 @@ static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu)
}
#undef BIT_NRIPS
+static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+ if (!best)
+ return -1;
+
+ return x86_family(best->eax);
+}
+
+static inline int guest_cpuid_model(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+ if (!best)
+ return -1;
+
+ return x86_model(best->eax);
+}
+
+static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+ if (!best)
+ return -1;
+
+ return x86_stepping(best->eax);
+}
+
#endif
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 08116ff227cc..b0ea42b78ccd 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -420,6 +420,7 @@ void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_s
u8 saved_mode;
if (hpet_legacy_start) {
/* save existing mode for later reenablement */
+ WARN_ON(channel != 0);
saved_mode = kvm->arch.vpit->pit_state.channels[0].mode;
kvm->arch.vpit->pit_state.channels[0].mode = 0xff; /* disable timer */
pit_load_count(kvm, channel, val);
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 9e8bf13572e6..3f8c732117ec 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -120,14 +120,22 @@ static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state)
return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK;
}
-static u8 mtrr_disabled_type(void)
+static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu)
{
/*
* Intel SDM 11.11.2.2: all MTRRs are disabled when
* IA32_MTRR_DEF_TYPE.E bit is cleared, and the UC
* memory type is applied to all of physical memory.
+ *
+ * However, virtual machines can be run with CPUID such that
+ * there are no MTRRs. In that case, the firmware will never
+ * enable MTRRs and it is obviously undesirable to run the
+ * guest entirely with UC memory and we use WB.
*/
- return MTRR_TYPE_UNCACHABLE;
+ if (guest_cpuid_has_mtrr(vcpu))
+ return MTRR_TYPE_UNCACHABLE;
+ else
+ return MTRR_TYPE_WRBACK;
}
/*
@@ -267,7 +275,7 @@ static int fixed_mtrr_addr_to_seg(u64 addr)
for (seg = 0; seg < seg_num; seg++) {
mtrr_seg = &fixed_seg_table[seg];
- if (mtrr_seg->start >= addr && addr < mtrr_seg->end)
+ if (mtrr_seg->start <= addr && addr < mtrr_seg->end)
return seg;
}
@@ -300,7 +308,6 @@ static void var_mtrr_range(struct kvm_mtrr_range *range, u64 *start, u64 *end)
*start = range->base & PAGE_MASK;
mask = range->mask & PAGE_MASK;
- mask |= ~0ULL << boot_cpu_data.x86_phys_bits;
/* This cannot overflow because writing to the reserved bits of
* variable MTRRs causes a #GP.
@@ -356,10 +363,14 @@ static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
if (var_mtrr_range_is_valid(cur))
list_del(&mtrr_state->var_ranges[index].node);
+ /* Extend the mask with all 1 bits to the left, since those
+ * bits must implicitly be 0. The bits are then cleared
+ * when reading them.
+ */
if (!is_mtrr_mask)
cur->base = data;
else
- cur->mask = data;
+ cur->mask = data | (-1LL << cpuid_maxphyaddr(vcpu));
/* add it to the list if it's enabled. */
if (var_mtrr_range_is_valid(cur)) {
@@ -426,6 +437,8 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
*pdata = vcpu->arch.mtrr_state.var_ranges[index].base;
else
*pdata = vcpu->arch.mtrr_state.var_ranges[index].mask;
+
+ *pdata &= (1ULL << cpuid_maxphyaddr(vcpu)) - 1;
}
return 0;
@@ -670,7 +683,7 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
}
if (iter.mtrr_disabled)
- return mtrr_disabled_type();
+ return mtrr_disabled_type(vcpu);
/* not contained in any MTRRs. */
if (type == -1)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 83a1c643f9a5..22aef20bf9c7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3053,6 +3053,23 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_UCODE_REV:
msr_info->data = 0x01000065;
break;
+ case MSR_F15H_IC_CFG: {
+
+ int family, model;
+
+ family = guest_cpuid_family(vcpu);
+ model = guest_cpuid_model(vcpu);
+
+ if (family < 0 || model < 0)
+ return kvm_get_msr_common(vcpu, msr_info);
+
+ msr_info->data = 0;
+
+ if (family == 0x15 &&
+ (model >= 0x2 && model < 0x20))
+ msr_info->data = 0x1E;
+ }
+ break;
default:
return kvm_get_msr_common(vcpu, msr_info);
}
@@ -3422,6 +3439,8 @@ static int handle_exit(struct kvm_vcpu *vcpu)
struct kvm_run *kvm_run = vcpu->run;
u32 exit_code = svm->vmcb->control.exit_code;
+ trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
+
if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
vcpu->arch.cr0 = svm->vmcb->save.cr0;
if (npt_enabled)
@@ -3892,8 +3911,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
- trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM);
-
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_handle_nmi(&svm->vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 87acc5221740..44976a596fa6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2803,7 +2803,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.ia32_xss;
break;
case MSR_TSC_AUX:
- if (!guest_cpuid_has_rdtscp(vcpu))
+ if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated)
return 1;
/* Otherwise falls through */
default:
@@ -2909,7 +2909,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
break;
case MSR_TSC_AUX:
- if (!guest_cpuid_has_rdtscp(vcpu))
+ if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated)
return 1;
/* Check reserved bit, higher 32 bits should be zero */
if ((data >> 32) != 0)
@@ -7394,11 +7394,6 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
switch (type) {
case VMX_VPID_EXTENT_ALL_CONTEXT:
- if (get_vmcs12(vcpu)->virtual_processor_id == 0) {
- nested_vmx_failValid(vcpu,
- VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
- return 1;
- }
__vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
nested_vmx_succeed(vcpu);
break;
@@ -8047,6 +8042,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
u32 exit_reason = vmx->exit_reason;
u32 vectoring_info = vmx->idt_vectoring_info;
+ trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
+
/*
* Flush logged GPAs PML buffer, this will make dirty_bitmap more
* updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
@@ -8673,7 +8670,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->loaded_vmcs->launched = 1;
vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
- trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
/*
* the KVM_REQ_EVENT optimization bit is only on for one entry, and if
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00462bd63129..97592e190413 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2763,6 +2763,26 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
return 0;
}
+static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
+{
+ return (!lapic_in_kernel(vcpu) ||
+ kvm_apic_accept_pic_intr(vcpu));
+}
+
+/*
+ * if userspace requested an interrupt window, check that the
+ * interrupt window is open.
+ *
+ * No need to exit to userspace if we already have an interrupt queued.
+ */
+static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
+{
+ return kvm_arch_interrupt_allowed(vcpu) &&
+ !kvm_cpu_has_interrupt(vcpu) &&
+ !kvm_event_needs_reinjection(vcpu) &&
+ kvm_cpu_accept_dm_intr(vcpu);
+}
+
static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
@@ -2786,6 +2806,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
return -EEXIST;
vcpu->arch.pending_external_vector = irq->irq;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
return 0;
}
@@ -3551,9 +3572,11 @@ static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
{
+ int i;
mutex_lock(&kvm->arch.vpit->pit_state.lock);
memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
- kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
+ for (i = 0; i < 3; i++)
+ kvm_pit_load_count(kvm, i, ps->channels[i].count, 0);
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
return 0;
}
@@ -3572,6 +3595,7 @@ static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
{
int start = 0;
+ int i;
u32 prev_legacy, cur_legacy;
mutex_lock(&kvm->arch.vpit->pit_state.lock);
prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
@@ -3581,7 +3605,9 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
sizeof(kvm->arch.vpit->pit_state.channels));
kvm->arch.vpit->pit_state.flags = ps->flags;
- kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
+ for (i = 0; i < 3; i++)
+ kvm_pit_load_count(kvm, i, kvm->arch.vpit->pit_state.channels[i].count,
+ start && i == 0);
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
return 0;
}
@@ -5910,23 +5936,10 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
}
-/*
- * Check if userspace requested an interrupt window, and that the
- * interrupt window is open.
- *
- * No need to exit to userspace if we already have an interrupt queued.
- */
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
{
- if (!vcpu->run->request_interrupt_window || pic_in_kernel(vcpu->kvm))
- return false;
-
- if (kvm_cpu_has_interrupt(vcpu))
- return false;
-
- return (irqchip_split(vcpu->kvm)
- ? kvm_apic_accept_pic_intr(vcpu)
- : kvm_arch_interrupt_allowed(vcpu));
+ return vcpu->run->request_interrupt_window &&
+ likely(!pic_in_kernel(vcpu->kvm));
}
static void post_kvm_run_save(struct kvm_vcpu *vcpu)
@@ -5937,17 +5950,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
- if (!irqchip_in_kernel(vcpu->kvm))
- kvm_run->ready_for_interrupt_injection =
- kvm_arch_interrupt_allowed(vcpu) &&
- !kvm_cpu_has_interrupt(vcpu) &&
- !kvm_event_needs_reinjection(vcpu);
- else if (!pic_in_kernel(vcpu->kvm))
- kvm_run->ready_for_interrupt_injection =
- kvm_apic_accept_pic_intr(vcpu) &&
- !kvm_cpu_has_interrupt(vcpu);
- else
- kvm_run->ready_for_interrupt_injection = 1;
+ kvm_run->ready_for_interrupt_injection =
+ pic_in_kernel(vcpu->kvm) ||
+ kvm_vcpu_ready_for_interrupt_injection(vcpu);
}
static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -6360,8 +6365,10 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
{
int r;
- bool req_int_win = !lapic_in_kernel(vcpu) &&
- vcpu->run->request_interrupt_window;
+ bool req_int_win =
+ dm_request_for_irq_injection(vcpu) &&
+ kvm_cpu_accept_dm_intr(vcpu);
+
bool req_immediate_exit = false;
if (vcpu->requests) {
@@ -6513,6 +6520,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (req_immediate_exit)
smp_send_reschedule(vcpu->cpu);
+ trace_kvm_entry(vcpu->vcpu_id);
+ wait_lapic_expire(vcpu);
__kvm_guest_enter();
if (unlikely(vcpu->arch.switch_db_regs)) {
@@ -6525,8 +6534,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
}
- trace_kvm_entry(vcpu->vcpu_id);
- wait_lapic_expire(vcpu);
kvm_x86_ops->run(vcpu);
/*
@@ -6663,7 +6670,8 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
if (kvm_cpu_has_pending_timer(vcpu))
kvm_inject_pending_timer_irqs(vcpu);
- if (dm_request_for_irq_injection(vcpu)) {
+ if (dm_request_for_irq_injection(vcpu) &&
+ kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
r = 0;
vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
++vcpu->stat.request_irq_exits;
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a0d09f6c6533..4ba229ac3f4f 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1414,6 +1414,7 @@ __init void lguest_init(void)
pv_info.kernel_rpl = 1;
/* Everyone except Xen runs with this set. */
pv_info.shared_kernel_pmd = 1;
+ pv_info.features = 0;
/*
* We set up all the lguest overrides for sensitive operations. These
@@ -1472,7 +1473,6 @@ __init void lguest_init(void)
pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
pv_mmu_ops.lazy_mode.flush = paravirt_flush_lazy_mmu;
pv_mmu_ops.pte_update = lguest_pte_update;
- pv_mmu_ops.pte_update_defer = lguest_pte_update;
#ifdef CONFIG_X86_LOCAL_APIC
/* APIC read/write intercepts */
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index f2587888d987..a501fa25da41 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -16,7 +16,7 @@ clean-files := inat-tables.c
obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
-lib-y := delay.o misc.o cmdline.o
+lib-y := delay.o misc.o cmdline.o cpu.o
lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c
new file mode 100644
index 000000000000..aa417a97511c
--- /dev/null
+++ b/arch/x86/lib/cpu.c
@@ -0,0 +1,35 @@
+#include <linux/module.h>
+
+unsigned int x86_family(unsigned int sig)
+{
+ unsigned int x86;
+
+ x86 = (sig >> 8) & 0xf;
+
+ if (x86 == 0xf)
+ x86 += (sig >> 20) & 0xff;
+
+ return x86;
+}
+EXPORT_SYMBOL_GPL(x86_family);
+
+unsigned int x86_model(unsigned int sig)
+{
+ unsigned int fam, model;
+
+ fam = x86_family(sig);
+
+ model = (sig >> 4) & 0xf;
+
+ if (fam >= 0x6)
+ model += ((sig >> 16) & 0xf) << 4;
+
+ return model;
+}
+EXPORT_SYMBOL_GPL(x86_model);
+
+unsigned int x86_stepping(unsigned int sig)
+{
+ return sig & 0xf;
+}
+EXPORT_SYMBOL_GPL(x86_stepping);
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index 43623739c7cf..004c861b1648 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -1,6 +1,8 @@
#include <linux/module.h>
#include <linux/preempt.h>
#include <asm/msr.h>
+#define CREATE_TRACE_POINTS
+#include <asm/msr-trace.h>
struct msr *msrs_alloc(void)
{
@@ -108,3 +110,27 @@ int msr_clear_bit(u32 msr, u8 bit)
{
return __flip_bit(msr, bit, false);
}
+
+#ifdef CONFIG_TRACEPOINTS
+void do_trace_write_msr(unsigned msr, u64 val, int failed)
+{
+ trace_write_msr(msr, val, failed);
+}
+EXPORT_SYMBOL(do_trace_write_msr);
+EXPORT_TRACEPOINT_SYMBOL(write_msr);
+
+void do_trace_read_msr(unsigned msr, u64 val, int failed)
+{
+ trace_read_msr(msr, val, failed);
+}
+EXPORT_SYMBOL(do_trace_read_msr);
+EXPORT_TRACEPOINT_SYMBOL(read_msr);
+
+void do_trace_rdpmc(unsigned counter, u64 val, int failed)
+{
+ trace_rdpmc(counter, val, failed);
+}
+EXPORT_SYMBOL(do_trace_rdpmc);
+EXPORT_TRACEPOINT_SYMBOL(rdpmc);
+
+#endif
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 65c47fda26fc..f9d38a48e3c8 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o
+obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
new file mode 100644
index 000000000000..bfcffdf6c577
--- /dev/null
+++ b/arch/x86/mm/debug_pagetables.c
@@ -0,0 +1,46 @@
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <asm/pgtable.h>
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+ ptdump_walk_pgd_level(m, NULL);
+ return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, ptdump_show, NULL);
+}
+
+static const struct file_operations ptdump_fops = {
+ .owner = THIS_MODULE,
+ .open = ptdump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static struct dentry *pe;
+
+static int __init pt_dump_debug_init(void)
+{
+ pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL,
+ &ptdump_fops);
+ if (!pe)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void __exit pt_dump_debug_exit(void)
+{
+ debugfs_remove_recursive(pe);
+}
+
+module_init(pt_dump_debug_init);
+module_exit(pt_dump_debug_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
+MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index a035c2aa7801..4a6f1d9b5106 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -89,7 +89,7 @@ static struct addr_marker address_markers[] = {
{ 0/* VMALLOC_START */, "vmalloc() Area" },
{ 0/*VMALLOC_END*/, "vmalloc() End" },
# ifdef CONFIG_HIGHMEM
- { 0/*PKMAP_BASE*/, "Persisent kmap() Area" },
+ { 0/*PKMAP_BASE*/, "Persistent kmap() Area" },
# endif
{ 0/*FIXADDR_START*/, "Fixmap Area" },
#endif
@@ -426,38 +426,15 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
{
ptdump_walk_pgd_level_core(m, pgd, false);
}
+EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
void ptdump_walk_pgd_level_checkwx(void)
{
ptdump_walk_pgd_level_core(NULL, NULL, true);
}
-#ifdef CONFIG_X86_PTDUMP
-static int ptdump_show(struct seq_file *m, void *v)
+static int __init pt_dump_init(void)
{
- ptdump_walk_pgd_level(m, NULL);
- return 0;
-}
-
-static int ptdump_open(struct inode *inode, struct file *filp)
-{
- return single_open(filp, ptdump_show, NULL);
-}
-
-static const struct file_operations ptdump_fops = {
- .open = ptdump_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-#endif
-
-static int pt_dump_init(void)
-{
-#ifdef CONFIG_X86_PTDUMP
- struct dentry *pe;
-#endif
-
#ifdef CONFIG_X86_32
/* Not a compile-time constant on x86-32 */
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
@@ -468,13 +445,6 @@ static int pt_dump_init(void)
address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
#endif
-#ifdef CONFIG_X86_PTDUMP
- pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
- &ptdump_fops);
- if (!pe)
- return -ENOMEM;
-#endif
-
return 0;
}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index b9c78f3bcd67..0d8d53d1f5cc 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -194,8 +194,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
* Check if the request spans more than any BAR in the iomem resource
* tree.
*/
- WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size),
- KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
+ if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
+ pr_warn("caller %pS mapping multiple BARs\n", caller);
return ret_addr;
err_free_area:
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index b0ae85f90f10..b2fd67da1701 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -101,19 +101,19 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
switch (type) {
case REG_TYPE_RM:
regno = X86_MODRM_RM(insn->modrm.value);
- if (X86_REX_B(insn->rex_prefix.value) == 1)
+ if (X86_REX_B(insn->rex_prefix.value))
regno += 8;
break;
case REG_TYPE_INDEX:
regno = X86_SIB_INDEX(insn->sib.value);
- if (X86_REX_X(insn->rex_prefix.value) == 1)
+ if (X86_REX_X(insn->rex_prefix.value))
regno += 8;
break;
case REG_TYPE_BASE:
regno = X86_SIB_BASE(insn->sib.value);
- if (X86_REX_B(insn->rex_prefix.value) == 1)
+ if (X86_REX_B(insn->rex_prefix.value))
regno += 8;
break;
@@ -586,6 +586,29 @@ static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm,
}
/*
+ * We only want to do a 4-byte get_user() on 32-bit. Otherwise,
+ * we might run off the end of the bounds table if we are on
+ * a 64-bit kernel and try to get 8 bytes.
+ */
+int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
+ long __user *bd_entry_ptr)
+{
+ u32 bd_entry_32;
+ int ret;
+
+ if (is_64bit_mm(mm))
+ return get_user(*bd_entry_ret, bd_entry_ptr);
+
+ /*
+ * Note that get_user() uses the type of the *pointer* to
+ * establish the size of the get, not the destination.
+ */
+ ret = get_user(bd_entry_32, (u32 __user *)bd_entry_ptr);
+ *bd_entry_ret = bd_entry_32;
+ return ret;
+}
+
+/*
* Get the base of bounds tables pointed by specific bounds
* directory entry.
*/
@@ -605,7 +628,7 @@ static int get_bt_addr(struct mm_struct *mm,
int need_write = 0;
pagefault_disable();
- ret = get_user(bd_entry, bd_entry_ptr);
+ ret = get_user_bd_entry(mm, &bd_entry, bd_entry_ptr);
pagefault_enable();
if (!ret)
break;
@@ -700,11 +723,23 @@ static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm,
*/
static inline unsigned long bd_entry_virt_space(struct mm_struct *mm)
{
- unsigned long long virt_space = (1ULL << boot_cpu_data.x86_virt_bits);
- if (is_64bit_mm(mm))
- return virt_space / MPX_BD_NR_ENTRIES_64;
- else
- return virt_space / MPX_BD_NR_ENTRIES_32;
+ unsigned long long virt_space;
+ unsigned long long GB = (1ULL << 30);
+
+ /*
+ * This covers 32-bit emulation as well as 32-bit kernels
+ * running on 64-bit harware.
+ */
+ if (!is_64bit_mm(mm))
+ return (4ULL * GB) / MPX_BD_NR_ENTRIES_32;
+
+ /*
+ * 'x86_virt_bits' returns what the hardware is capable
+ * of, and returns the full >32-bit adddress space when
+ * running 32-bit kernels on 64-bit hardware.
+ */
+ virt_space = (1ULL << boot_cpu_data.x86_virt_bits);
+ return virt_space / MPX_BD_NR_ENTRIES_64;
}
/*
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a3137a4feed1..6000ad7f560c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -129,14 +129,16 @@ within(unsigned long addr, unsigned long start, unsigned long end)
*/
void clflush_cache_range(void *vaddr, unsigned int size)
{
- unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
+ const unsigned long clflush_size = boot_cpu_data.x86_clflush_size;
+ void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1));
void *vend = vaddr + size;
- void *p;
+
+ if (p >= vend)
+ return;
mb();
- for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
- p < vend; p += boot_cpu_data.x86_clflush_size)
+ for (; p < vend; p += clflush_size)
clflushopt(p);
mb();
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 188e3e07eeeb..031782e74231 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -586,7 +586,7 @@ int free_memtype(u64 start, u64 end)
entry = rbt_memtype_erase(start, end);
spin_unlock(&memtype_lock);
- if (!entry) {
+ if (IS_ERR(entry)) {
pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
current->comm, current->pid, start, end - 1);
return -EINVAL;
@@ -992,6 +992,16 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
vma->vm_flags &= ~VM_PAT;
}
+/*
+ * untrack_pfn_moved is called, while mremapping a pfnmap for a new region,
+ * with the old vma after its pfnmap page table has been removed. The new
+ * vma has a new pfnmap to the same pfn & cache type with VM_PAT set.
+ */
+void untrack_pfn_moved(struct vm_area_struct *vma)
+{
+ vma->vm_flags &= ~VM_PAT;
+}
+
pgprot_t pgprot_writecombine(pgprot_t prot)
{
return __pgprot(pgprot_val(prot) |
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 63931080366a..2f7702253ccf 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -98,8 +98,13 @@ static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
return last_lower; /* Returns NULL if there is no overlap */
}
-static struct memtype *memtype_rb_exact_match(struct rb_root *root,
- u64 start, u64 end)
+enum {
+ MEMTYPE_EXACT_MATCH = 0,
+ MEMTYPE_END_MATCH = 1
+};
+
+static struct memtype *memtype_rb_match(struct rb_root *root,
+ u64 start, u64 end, int match_type)
{
struct memtype *match;
@@ -107,7 +112,12 @@ static struct memtype *memtype_rb_exact_match(struct rb_root *root,
while (match != NULL && match->start < end) {
struct rb_node *node;
- if (match->start == start && match->end == end)
+ if ((match_type == MEMTYPE_EXACT_MATCH) &&
+ (match->start == start) && (match->end == end))
+ return match;
+
+ if ((match_type == MEMTYPE_END_MATCH) &&
+ (match->start < start) && (match->end == end))
return match;
node = rb_next(&match->rb);
@@ -117,7 +127,7 @@ static struct memtype *memtype_rb_exact_match(struct rb_root *root,
match = NULL;
}
- return NULL; /* Returns NULL if there is no exact match */
+ return NULL; /* Returns NULL if there is no match */
}
static int memtype_rb_check_conflict(struct rb_root *root,
@@ -210,12 +220,36 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)
{
struct memtype *data;
- data = memtype_rb_exact_match(&memtype_rbroot, start, end);
- if (!data)
- goto out;
+ /*
+ * Since the memtype_rbroot tree allows overlapping ranges,
+ * rbt_memtype_erase() checks with EXACT_MATCH first, i.e. free
+ * a whole node for the munmap case. If no such entry is found,
+ * it then checks with END_MATCH, i.e. shrink the size of a node
+ * from the end for the mremap case.
+ */
+ data = memtype_rb_match(&memtype_rbroot, start, end,
+ MEMTYPE_EXACT_MATCH);
+ if (!data) {
+ data = memtype_rb_match(&memtype_rbroot, start, end,
+ MEMTYPE_END_MATCH);
+ if (!data)
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (data->start == start) {
+ /* munmap: erase this node */
+ rb_erase_augmented(&data->rb, &memtype_rbroot,
+ &memtype_rb_augment_cb);
+ } else {
+ /* mremap: update the end value of this node */
+ rb_erase_augmented(&data->rb, &memtype_rbroot,
+ &memtype_rb_augment_cb);
+ data->end = start;
+ data->subtree_max_end = data->end;
+ memtype_rb_insert(&memtype_rbroot, data);
+ return NULL;
+ }
- rb_erase_augmented(&data->rb, &memtype_rbroot, &memtype_rb_augment_cb);
-out:
return data;
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index fb0a9dd1d6e4..ee9c2e3a7199 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -414,7 +414,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
if (changed && dirty) {
*ptep = entry;
- pte_update_defer(vma->vm_mm, address, ptep);
+ pte_update(vma->vm_mm, address, ptep);
}
return changed;
@@ -431,7 +431,6 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
if (changed && dirty) {
*pmdp = entry;
- pmd_update_defer(vma->vm_mm, address, pmdp);
/*
* We had a write-protection fault here and changed the pmd
* to to more permissive. No need to flush the TLB for that,
@@ -469,9 +468,6 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma,
ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
(unsigned long *)pmdp);
- if (ret)
- pmd_update(vma->vm_mm, addr, pmdp);
-
return ret;
}
#endif
@@ -518,7 +514,6 @@ void pmdp_splitting_flush(struct vm_area_struct *vma,
set = !test_and_set_bit(_PAGE_BIT_SPLITTING,
(unsigned long *)pmdp);
if (set) {
- pmd_update(vma->vm_mm, address, pmdp);
/* need tlb flush only to serialize against gup-fast */
flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 90555bf60aa4..92e2eacb3321 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -31,7 +31,7 @@ early_param("noexec", noexec_setup);
void x86_configure_nx(void)
{
- if (cpu_has_nx && !disable_nx)
+ if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx)
__supported_pte_mask |= _PAGE_NX;
else
__supported_pte_mask &= ~_PAGE_NX;
@@ -39,7 +39,7 @@ void x86_configure_nx(void)
void __init x86_report_nx(void)
{
- if (!cpu_has_nx) {
+ if (!boot_cpu_has(X86_FEATURE_NX)) {
printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
"missing in CPU!\n");
} else {
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index c2aea63bee20..b5f821881465 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -203,6 +203,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
(unsigned long long)start, (unsigned long long)end - 1);
+ max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
+
return 0;
out_err_bad_srat:
bad_srat();
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index 7bcf06a7cd12..6eb3c8af96e2 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -50,18 +50,9 @@ void x86_pci_root_bus_resources(int bus, struct list_head *resources)
if (!found)
pci_add_resource(resources, &info->busn);
- list_for_each_entry(root_res, &info->resources, list) {
- struct resource *res;
- struct resource *root;
+ list_for_each_entry(root_res, &info->resources, list)
+ pci_add_resource(resources, &root_res->res);
- res = &root_res->res;
- pci_add_resource(resources, res);
- if (res->flags & IORESOURCE_IO)
- root = &ioport_resource;
- else
- root = &iomem_resource;
- insert_resource(root, res);
- }
return;
default_resources:
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 327f21c3bde1..8dd80050d705 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -28,6 +28,7 @@
#include <linux/nmi.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/clocksource.h>
#include <asm/apic.h>
#include <asm/current.h>
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 9ab52791fed5..d5f64996394a 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -23,6 +23,7 @@
#include <asm/debugreg.h>
#include <asm/cpu.h>
#include <asm/mmu_context.h>
+#include <linux/dmi.h>
#ifdef CONFIG_X86_32
__visible unsigned long saved_context_ebx;
@@ -32,6 +33,29 @@ __visible unsigned long saved_context_eflags;
#endif
struct saved_context saved_context;
+static void msr_save_context(struct saved_context *ctxt)
+{
+ struct saved_msr *msr = ctxt->saved_msrs.array;
+ struct saved_msr *end = msr + ctxt->saved_msrs.num;
+
+ while (msr < end) {
+ msr->valid = !rdmsrl_safe(msr->info.msr_no, &msr->info.reg.q);
+ msr++;
+ }
+}
+
+static void msr_restore_context(struct saved_context *ctxt)
+{
+ struct saved_msr *msr = ctxt->saved_msrs.array;
+ struct saved_msr *end = msr + ctxt->saved_msrs.num;
+
+ while (msr < end) {
+ if (msr->valid)
+ wrmsrl(msr->info.msr_no, msr->info.reg.q);
+ msr++;
+ }
+}
+
/**
* __save_processor_state - save CPU registers before creating a
* hibernation image and before restoring the memory state from it
@@ -111,6 +135,7 @@ static void __save_processor_state(struct saved_context *ctxt)
#endif
ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE,
&ctxt->misc_enable);
+ msr_save_context(ctxt);
}
/* Needed by apm.c */
@@ -229,6 +254,7 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
x86_platform.restore_sched_clock_state();
mtrr_bp_restore();
perf_restore_debug_store();
+ msr_restore_context(ctxt);
}
/* Needed by apm.c */
@@ -320,3 +346,69 @@ static int __init bsp_pm_check_init(void)
}
core_initcall(bsp_pm_check_init);
+
+static int msr_init_context(const u32 *msr_id, const int total_num)
+{
+ int i = 0;
+ struct saved_msr *msr_array;
+
+ if (saved_context.saved_msrs.array || saved_context.saved_msrs.num > 0) {
+ pr_err("x86/pm: MSR quirk already applied, please check your DMI match table.\n");
+ return -EINVAL;
+ }
+
+ msr_array = kmalloc_array(total_num, sizeof(struct saved_msr), GFP_KERNEL);
+ if (!msr_array) {
+ pr_err("x86/pm: Can not allocate memory to save/restore MSRs during suspend.\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < total_num; i++) {
+ msr_array[i].info.msr_no = msr_id[i];
+ msr_array[i].valid = false;
+ msr_array[i].info.reg.q = 0;
+ }
+ saved_context.saved_msrs.num = total_num;
+ saved_context.saved_msrs.array = msr_array;
+
+ return 0;
+}
+
+/*
+ * The following section is a quirk framework for problematic BIOSen:
+ * Sometimes MSRs are modified by the BIOSen after suspended to
+ * RAM, this might cause unexpected behavior after wakeup.
+ * Thus we save/restore these specified MSRs across suspend/resume
+ * in order to work around it.
+ *
+ * For any further problematic BIOSen/platforms,
+ * please add your own function similar to msr_initialize_bdw.
+ */
+static int msr_initialize_bdw(const struct dmi_system_id *d)
+{
+ /* Add any extra MSR ids into this array. */
+ u32 bdw_msr_id[] = { MSR_IA32_THERM_CONTROL };
+
+ pr_info("x86/pm: %s detected, MSR saving is needed during suspending.\n", d->ident);
+ return msr_init_context(bdw_msr_id, ARRAY_SIZE(bdw_msr_id));
+}
+
+static struct dmi_system_id msr_save_dmi_table[] = {
+ {
+ .callback = msr_initialize_bdw,
+ .ident = "BROADWELL BDX_EP",
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "GRANTLEY"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "E63448-400"),
+ },
+ },
+ {}
+};
+
+static int pm_check_save_msr(void)
+{
+ dmi_check_system(msr_save_dmi_table);
+ return 0;
+}
+
+device_initcall(pm_check_save_msr);
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 06934a8a4872..14fcd01ed992 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -211,7 +211,7 @@ static int copy_sc_from_user(struct pt_regs *regs,
if (err)
return 1;
- err = convert_fxsr_from_user(&fpx, sc.fpstate);
+ err = convert_fxsr_from_user(&fpx, (void *)sc.fpstate);
if (err)
return 1;
@@ -227,7 +227,7 @@ static int copy_sc_from_user(struct pt_regs *regs,
{
struct user_i387_struct fp;
- err = copy_from_user(&fp, sc.fpstate,
+ err = copy_from_user(&fp, (void *)sc.fpstate,
sizeof(struct user_i387_struct));
if (err)
return 1;
@@ -291,7 +291,7 @@ static int copy_sc_to_user(struct sigcontext __user *to,
#endif
#undef PUTREG
sc.oldmask = mask;
- sc.fpstate = to_fp;
+ sc.fpstate = (unsigned long)to_fp;
err = copy_to_user(to, &sc, sizeof(struct sigcontext));
if (err)
@@ -468,12 +468,10 @@ long sys_sigreturn(void)
struct sigframe __user *frame = (struct sigframe __user *)(sp - 8);
sigset_t set;
struct sigcontext __user *sc = &frame->sc;
- unsigned long __user *oldmask = &sc->oldmask;
- unsigned long __user *extramask = frame->extramask;
int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long);
- if (copy_from_user(&set.sig[0], oldmask, sizeof(set.sig[0])) ||
- copy_from_user(&set.sig[1], extramask, sig_size))
+ if (copy_from_user(&set.sig[0], &sc->oldmask, sizeof(set.sig[0])) ||
+ copy_from_user(&set.sig[1], frame->extramask, sig_size))
goto segfault;
set_current_blocked(&set);
@@ -505,6 +503,7 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig,
{
struct rt_sigframe __user *frame;
int err = 0, sig = ksig->sig;
+ unsigned long fp_to;
frame = (struct rt_sigframe __user *)
round_down(stack_top - sizeof(struct rt_sigframe), 16);
@@ -526,7 +525,10 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig,
err |= __save_altstack(&frame->uc.uc_stack, PT_REGS_SP(regs));
err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs,
set->sig[0]);
- err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate);
+
+ fp_to = (unsigned long)&frame->fpstate;
+
+ err |= __put_user(fp_to, &frame->uc.uc_mcontext.fpstate);
if (sizeof(*set) == 16) {
err |= __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
err |= __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 5774800ff583..23063923e364 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1192,7 +1192,7 @@ static const struct pv_info xen_info __initconst = {
#ifdef CONFIG_X86_64
.extra_user_64bit_cs = FLAT_USER_CS64,
#endif
-
+ .features = 0,
.name = "Xen",
};
@@ -1229,10 +1229,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.iret = xen_iret,
#ifdef CONFIG_X86_64
- .usergs_sysret32 = xen_sysret32,
.usergs_sysret64 = xen_sysret64,
-#else
- .irq_enable_sysexit = xen_sysexit,
#endif
.load_tr_desc = paravirt_nop,
@@ -1265,12 +1262,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.end_context_switch = xen_end_context_switch,
};
-static const struct pv_apic_ops xen_apic_ops __initconst = {
-#ifdef CONFIG_X86_LOCAL_APIC
- .startup_ipi_hook = paravirt_nop,
-#endif
-};
-
static void xen_reboot(int reason)
{
struct sched_shutdown r = { .reason = reason };
@@ -1535,8 +1526,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* Install Xen paravirt ops */
pv_info = xen_info;
+ if (xen_initial_domain())
+ pv_info.features |= PV_SUPPORTED_RTC;
pv_init_ops = xen_init_ops;
- pv_apic_ops = xen_apic_ops;
if (!xen_pvh_domain()) {
pv_cpu_ops = xen_cpu_ops;
@@ -1886,8 +1878,10 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
static void xen_set_cpu_features(struct cpuinfo_x86 *c)
{
- if (xen_pv_domain())
+ if (xen_pv_domain()) {
clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+ set_cpu_cap(c, X86_FEATURE_XENPV);
+ }
}
const struct hypervisor_x86 x86_hyper_xen = {
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index ac161db63388..c913ca4f6958 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2436,7 +2436,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.flush_tlb_others = xen_flush_tlb_others,
.pte_update = paravirt_nop,
- .pte_update_defer = paravirt_nop,
.pgd_alloc = xen_pgd_alloc,
.pgd_free = xen_pgd_free,
@@ -2495,14 +2494,9 @@ void __init xen_init_mmu_ops(void)
{
x86_init.paging.pagetable_init = xen_pagetable_init;
- /* Optimization - we can use the HVM one but it has no idea which
- * VCPUs are descheduled - which means that it will needlessly IPI
- * them. Xen knows so let it do the job.
- */
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
- pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
+ if (xen_feature(XENFEAT_auto_translated_physmap))
return;
- }
+
pv_mmu_ops = xen_mmu_ops;
memset(dummy_mapping, 0xff, PAGE_SIZE);
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index feddabdab448..df0c40559583 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -1,6 +1,7 @@
#include <linux/types.h>
#include <linux/tick.h>
+#include <xen/xen.h>
#include <xen/interface/xen.h>
#include <xen/grant_table.h>
#include <xen/events.h>
@@ -68,26 +69,16 @@ static void xen_pv_post_suspend(int suspend_cancelled)
void xen_arch_pre_suspend(void)
{
- int cpu;
-
- for_each_online_cpu(cpu)
- xen_pmu_finish(cpu);
-
if (xen_pv_domain())
xen_pv_pre_suspend();
}
void xen_arch_post_suspend(int cancelled)
{
- int cpu;
-
if (xen_pv_domain())
xen_pv_post_suspend(cancelled);
else
xen_hvm_post_suspend(cancelled);
-
- for_each_online_cpu(cpu)
- xen_pmu_init(cpu);
}
static void xen_vcpu_notify_restore(void *data)
@@ -106,10 +97,20 @@ static void xen_vcpu_notify_suspend(void *data)
void xen_arch_resume(void)
{
+ int cpu;
+
on_each_cpu(xen_vcpu_notify_restore, NULL, 1);
+
+ for_each_online_cpu(cpu)
+ xen_pmu_init(cpu);
}
void xen_arch_suspend(void)
{
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ xen_pmu_finish(cpu);
+
on_each_cpu(xen_vcpu_notify_suspend, NULL, 1);
}
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index fd92a64d748e..feb6d40a0860 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -35,20 +35,6 @@ check_events:
ret
/*
- * We can't use sysexit directly, because we're not running in ring0.
- * But we can easily fake it up using iret. Assuming xen_sysexit is
- * jumped to with a standard stack frame, we can just strip it back to
- * a standard iret frame and use iret.
- */
-ENTRY(xen_sysexit)
- movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
- orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
- lea PT_EIP(%esp), %esp
-
- jmp xen_iret
-ENDPROC(xen_sysexit)
-
-/*
* This is run where a normal iret would be run, with the same stack setup:
* 8: eflags
* 4: cs
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index f22667abf7b9..cc8acc410ddb 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -68,25 +68,6 @@ ENTRY(xen_sysret64)
ENDPATCH(xen_sysret64)
RELOC(xen_sysret64, 1b+1)
-ENTRY(xen_sysret32)
- /*
- * We're already on the usermode stack at this point, but
- * still with the kernel gs, so we can easily switch back
- */
- movq %rsp, PER_CPU_VAR(rsp_scratch)
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-
- pushq $__USER32_DS
- pushq PER_CPU_VAR(rsp_scratch)
- pushq %r11
- pushq $__USER32_CS
- pushq %rcx
-
- pushq $0
-1: jmp hypercall_iret
-ENDPATCH(xen_sysret32)
-RELOC(xen_sysret32, 1b+1)
-
/*
* Xen handles syscall callbacks much like ordinary exceptions, which
* means we have:
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 1399423f3418..4140b070f2e9 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -139,9 +139,6 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long);
/* These are not functions, and cannot be called normally */
__visible void xen_iret(void);
-#ifdef CONFIG_X86_32
-__visible void xen_sysexit(void);
-#endif
__visible void xen_sysret32(void);
__visible void xen_sysret64(void);
__visible void xen_adjust_exception_frame(void);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 5bcdfc10c23a..5a37188b559f 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1127,15 +1127,15 @@ void blkcg_exit_queue(struct request_queue *q)
* of the main cic data structures. For now we allow a task to change
* its cgroup only if it's the only owner of its ioc.
*/
-static int blkcg_can_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static int blkcg_can_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
+ struct cgroup_subsys_state *dst_css;
struct io_context *ioc;
int ret = 0;
/* task_lock() is needed to avoid races with exit_io_context() */
- cgroup_taskset_for_each(task, tset) {
+ cgroup_taskset_for_each(task, dst_css, tset) {
task_lock(task);
ioc = task->io_context;
if (ioc && atomic_read(&ioc->nr_tasks) > 1)
diff --git a/block/blk-core.c b/block/blk-core.c
index 5131993b23a1..33e2f62d5062 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -207,6 +207,22 @@ void blk_delay_queue(struct request_queue *q, unsigned long msecs)
EXPORT_SYMBOL(blk_delay_queue);
/**
+ * blk_start_queue_async - asynchronously restart a previously stopped queue
+ * @q: The &struct request_queue in question
+ *
+ * Description:
+ * blk_start_queue_async() will clear the stop flag on the queue, and
+ * ensure that the request_fn for the queue is run from an async
+ * context.
+ **/
+void blk_start_queue_async(struct request_queue *q)
+{
+ queue_flag_clear(QUEUE_FLAG_STOPPED, q);
+ blk_run_queue_async(q);
+}
+EXPORT_SYMBOL(blk_start_queue_async);
+
+/**
* blk_start_queue - restart a previously stopped queue
* @q: The &struct request_queue in question
*
@@ -1689,8 +1705,6 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
struct request *req;
unsigned int request_count = 0;
- blk_queue_split(q, &bio, q->bio_split);
-
/*
* low level driver can indicate that it wants pages above a
* certain limit bounced to low memory (ie for highmem, or even
@@ -1698,6 +1712,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
*/
blk_queue_bounce(q, &bio);
+ blk_queue_split(q, &bio, q->bio_split);
+
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
bio->bi_error = -EIO;
bio_endio(bio);
@@ -2114,7 +2130,8 @@ blk_qc_t submit_bio(int rw, struct bio *bio)
EXPORT_SYMBOL(submit_bio);
/**
- * blk_rq_check_limits - Helper function to check a request for the queue limit
+ * blk_cloned_rq_check_limits - Helper function to check a cloned request
+ * for new the queue limits
* @q: the queue
* @rq: the request being checked
*
@@ -2125,20 +2142,13 @@ EXPORT_SYMBOL(submit_bio);
* after it is inserted to @q, it should be checked against @q before
* the insertion using this generic function.
*
- * This function should also be useful for request stacking drivers
- * in some cases below, so export this function.
* Request stacking drivers like request-based dm may change the queue
- * limits while requests are in the queue (e.g. dm's table swapping).
- * Such request stacking drivers should check those requests against
- * the new queue limits again when they dispatch those requests,
- * although such checkings are also done against the old queue limits
- * when submitting requests.
+ * limits when retrying requests on other queues. Those requests need
+ * to be checked against the new queue limits again during dispatch.
*/
-int blk_rq_check_limits(struct request_queue *q, struct request *rq)
+static int blk_cloned_rq_check_limits(struct request_queue *q,
+ struct request *rq)
{
- if (!rq_mergeable(rq))
- return 0;
-
if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) {
printk(KERN_ERR "%s: over max size limit.\n", __func__);
return -EIO;
@@ -2158,7 +2168,6 @@ int blk_rq_check_limits(struct request_queue *q, struct request *rq)
return 0;
}
-EXPORT_SYMBOL_GPL(blk_rq_check_limits);
/**
* blk_insert_cloned_request - Helper for stacking drivers to submit a request
@@ -2170,7 +2179,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
unsigned long flags;
int where = ELEVATOR_INSERT_BACK;
- if (blk_rq_check_limits(q, rq))
+ if (blk_cloned_rq_check_limits(q, rq))
return -EIO;
if (rq->rq_disk &&
@@ -3412,6 +3421,9 @@ int blk_pre_runtime_suspend(struct request_queue *q)
{
int ret = 0;
+ if (!q->dev)
+ return ret;
+
spin_lock_irq(q->queue_lock);
if (q->nr_pending) {
ret = -EBUSY;
@@ -3439,6 +3451,9 @@ EXPORT_SYMBOL(blk_pre_runtime_suspend);
*/
void blk_post_runtime_suspend(struct request_queue *q, int err)
{
+ if (!q->dev)
+ return;
+
spin_lock_irq(q->queue_lock);
if (!err) {
q->rpm_status = RPM_SUSPENDED;
@@ -3463,6 +3478,9 @@ EXPORT_SYMBOL(blk_post_runtime_suspend);
*/
void blk_pre_runtime_resume(struct request_queue *q)
{
+ if (!q->dev)
+ return;
+
spin_lock_irq(q->queue_lock);
q->rpm_status = RPM_RESUMING;
spin_unlock_irq(q->queue_lock);
@@ -3485,6 +3503,9 @@ EXPORT_SYMBOL(blk_pre_runtime_resume);
*/
void blk_post_runtime_resume(struct request_queue *q, int err)
{
+ if (!q->dev)
+ return;
+
spin_lock_irq(q->queue_lock);
if (!err) {
q->rpm_status = RPM_ACTIVE;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index de5716d8e525..e01405a3e8b3 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -76,6 +76,9 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
struct bio_vec bv, bvprv, *bvprvp = NULL;
struct bvec_iter iter;
unsigned seg_size = 0, nsegs = 0, sectors = 0;
+ unsigned front_seg_size = bio->bi_seg_front_size;
+ bool do_split = true;
+ struct bio *new = NULL;
bio_for_each_segment(bv, bio, iter) {
if (sectors + (bv.bv_len >> 9) > queue_max_sectors(q))
@@ -98,8 +101,11 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
seg_size += bv.bv_len;
bvprv = bv;
- bvprvp = &bv;
+ bvprvp = &bvprv;
sectors += bv.bv_len >> 9;
+
+ if (nsegs == 1 && seg_size > front_seg_size)
+ front_seg_size = seg_size;
continue;
}
new_segment:
@@ -108,16 +114,29 @@ new_segment:
nsegs++;
bvprv = bv;
- bvprvp = &bv;
+ bvprvp = &bvprv;
seg_size = bv.bv_len;
sectors += bv.bv_len >> 9;
+
+ if (nsegs == 1 && seg_size > front_seg_size)
+ front_seg_size = seg_size;
}
- *segs = nsegs;
- return NULL;
+ do_split = false;
split:
*segs = nsegs;
- return bio_split(bio, sectors, GFP_NOIO, bs);
+
+ if (do_split) {
+ new = bio_split(bio, sectors, GFP_NOIO, bs);
+ if (new)
+ bio = new;
+ }
+
+ bio->bi_seg_front_size = front_seg_size;
+ if (seg_size > bio->bi_seg_back_size)
+ bio->bi_seg_back_size = seg_size;
+
+ return do_split ? new : NULL;
}
void blk_queue_split(struct request_queue *q, struct bio **bio,
@@ -412,6 +431,12 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
if (sg)
sg_mark_end(sg);
+ /*
+ * Something must have been wrong if the figured number of
+ * segment is bigger than number of req's physical segments
+ */
+ WARN_ON(nsegs > rq->nr_phys_segments);
+
return nsegs;
}
EXPORT_SYMBOL(blk_rq_map_sg);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3ae09de62f19..6d6f8feb48c0 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1291,15 +1291,16 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
blk_mq_bio_to_request(rq, bio);
/*
- * we do limited pluging. If bio can be merged, do merge.
+ * We do limited pluging. If the bio can be merged, do that.
* Otherwise the existing request in the plug list will be
* issued. So the plug list will have one request at most
*/
if (plug) {
/*
* The plug list might get flushed before this. If that
- * happens, same_queue_rq is invalid and plug list is empty
- **/
+ * happens, same_queue_rq is invalid and plug list is
+ * empty
+ */
if (same_queue_rq && !list_empty(&plug->mq_list)) {
old_rq = same_queue_rq;
list_del_init(&old_rq->queuelist);
@@ -1380,12 +1381,15 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
blk_mq_bio_to_request(rq, bio);
if (!request_count)
trace_block_plug(q);
- else if (request_count >= BLK_MAX_REQUEST_COUNT) {
+
+ blk_mq_put_ctx(data.ctx);
+
+ if (request_count >= BLK_MAX_REQUEST_COUNT) {
blk_flush_plug_list(plug, false);
trace_block_plug(q);
}
+
list_add_tail(&rq->queuelist, &plug->mq_list);
- blk_mq_put_ctx(data.ctx);
return cookie;
}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 7d8f129a1516..dd4973583978 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -91,7 +91,8 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
lim->virt_boundary_mask = 0;
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
- lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
+ lim->max_sectors = lim->max_dev_sectors = lim->max_hw_sectors =
+ BLK_SAFE_MAX_SECTORS;
lim->chunk_sectors = 0;
lim->max_write_same_sectors = 0;
lim->max_discard_sectors = 0;
@@ -127,6 +128,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->max_hw_sectors = UINT_MAX;
lim->max_segment_size = UINT_MAX;
lim->max_sectors = UINT_MAX;
+ lim->max_dev_sectors = UINT_MAX;
lim->max_write_same_sectors = UINT_MAX;
}
EXPORT_SYMBOL(blk_set_stacking_limits);
@@ -214,8 +216,8 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 max_addr)
EXPORT_SYMBOL(blk_queue_bounce_limit);
/**
- * blk_limits_max_hw_sectors - set hard and soft limit of max sectors for request
- * @limits: the queue limits
+ * blk_queue_max_hw_sectors - set max sectors for a request for this queue
+ * @q: the request queue for the device
* @max_hw_sectors: max hardware sectors in the usual 512b unit
*
* Description:
@@ -224,13 +226,19 @@ EXPORT_SYMBOL(blk_queue_bounce_limit);
* the device driver based upon the capabilities of the I/O
* controller.
*
+ * max_dev_sectors is a hard limit imposed by the storage device for
+ * READ/WRITE requests. It is set by the disk driver.
+ *
* max_sectors is a soft limit imposed by the block layer for
* filesystem type requests. This value can be overridden on a
* per-device basis in /sys/block/<device>/queue/max_sectors_kb.
* The soft limit can not exceed max_hw_sectors.
**/
-void blk_limits_max_hw_sectors(struct queue_limits *limits, unsigned int max_hw_sectors)
+void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
{
+ struct queue_limits *limits = &q->limits;
+ unsigned int max_sectors;
+
if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) {
max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
printk(KERN_INFO "%s: set to minimum %d\n",
@@ -238,22 +246,9 @@ void blk_limits_max_hw_sectors(struct queue_limits *limits, unsigned int max_hw_
}
limits->max_hw_sectors = max_hw_sectors;
- limits->max_sectors = min_t(unsigned int, max_hw_sectors,
- BLK_DEF_MAX_SECTORS);
-}
-EXPORT_SYMBOL(blk_limits_max_hw_sectors);
-
-/**
- * blk_queue_max_hw_sectors - set max sectors for a request for this queue
- * @q: the request queue for the device
- * @max_hw_sectors: max hardware sectors in the usual 512b unit
- *
- * Description:
- * See description for blk_limits_max_hw_sectors().
- **/
-void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
-{
- blk_limits_max_hw_sectors(&q->limits, max_hw_sectors);
+ max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors);
+ max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS);
+ limits->max_sectors = max_sectors;
}
EXPORT_SYMBOL(blk_queue_max_hw_sectors);
@@ -527,6 +522,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
+ t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
t->max_write_same_sectors = min(t->max_write_same_sectors,
b->max_write_same_sectors);
t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 565b8dac5782..e140cc487ce1 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -205,6 +205,9 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
if (ret < 0)
return ret;
+ max_hw_sectors_kb = min_not_zero(max_hw_sectors_kb, (unsigned long)
+ q->limits.max_dev_sectors >> 1);
+
if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
return -EINVAL;
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 246dfb16c3d9..aa40aa93381b 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -158,11 +158,13 @@ void blk_abort_request(struct request *req)
{
if (blk_mark_rq_complete(req))
return;
- blk_delete_timer(req);
- if (req->q->mq_ops)
+
+ if (req->q->mq_ops) {
blk_mq_rq_timed_out(req, false);
- else
+ } else {
+ blk_delete_timer(req);
blk_rq_timed_out(req);
+ }
}
EXPORT_SYMBOL_GPL(blk_abort_request);
diff --git a/block/blk.h b/block/blk.h
index da722eb786df..c43926d3d74d 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -72,8 +72,6 @@ void blk_dequeue_request(struct request *rq);
void __blk_queue_free_tags(struct request_queue *q);
bool __blk_end_bidi_request(struct request *rq, int error,
unsigned int nr_bytes, unsigned int bidi_bytes);
-int blk_queue_enter(struct request_queue *q, gfp_t gfp);
-void blk_queue_exit(struct request_queue *q);
void blk_freeze_queue(struct request_queue *q);
static inline void blk_queue_enter_live(struct request_queue *q)
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index 3de89d4690f3..a163c487cf38 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -21,10 +21,10 @@ static void noop_merged_requests(struct request_queue *q, struct request *rq,
static int noop_dispatch(struct request_queue *q, int force)
{
struct noop_data *nd = q->elevator->elevator_data;
+ struct request *rq;
- if (!list_empty(&nd->queue)) {
- struct request *rq;
- rq = list_entry(nd->queue.next, struct request, queuelist);
+ rq = list_first_entry_or_null(&nd->queue, struct request, queuelist);
+ if (rq) {
list_del_init(&rq->queuelist);
elv_dispatch_sort(q, rq);
return 1;
@@ -46,7 +46,7 @@ noop_former_request(struct request_queue *q, struct request *rq)
if (rq->queuelist.prev == &nd->queue)
return NULL;
- return list_entry(rq->queuelist.prev, struct request, queuelist);
+ return list_prev_entry(rq, queuelist);
}
static struct request *
@@ -56,7 +56,7 @@ noop_latter_request(struct request_queue *q, struct request *rq)
if (rq->queuelist.next == &nd->queue)
return NULL;
- return list_entry(rq->queuelist.next, struct request, queuelist);
+ return list_next_entry(rq, queuelist);
}
static int noop_init_queue(struct request_queue *q, struct elevator_type *e)
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 3b030157ec85..746935a5973c 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -397,7 +397,7 @@ static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
struct hd_struct *part;
int res;
- if (bdev->bd_part_count)
+ if (bdev->bd_part_count || bdev->bd_super)
return -EBUSY;
res = invalidate_partition(disk, 0);
if (res)
diff --git a/block/partitions/mac.c b/block/partitions/mac.c
index c2c48ec64b27..621317ac4d59 100644
--- a/block/partitions/mac.c
+++ b/block/partitions/mac.c
@@ -32,7 +32,7 @@ int mac_partition(struct parsed_partitions *state)
Sector sect;
unsigned char *data;
int slot, blocks_in_map;
- unsigned secsize;
+ unsigned secsize, datasize, partoffset;
#ifdef CONFIG_PPC_PMAC
int found_root = 0;
int found_root_goodness = 0;
@@ -50,10 +50,14 @@ int mac_partition(struct parsed_partitions *state)
}
secsize = be16_to_cpu(md->block_size);
put_dev_sector(sect);
- data = read_part_sector(state, secsize/512, &sect);
+ datasize = round_down(secsize, 512);
+ data = read_part_sector(state, datasize / 512, &sect);
if (!data)
return -1;
- part = (struct mac_partition *) (data + secsize%512);
+ partoffset = secsize % 512;
+ if (partoffset + sizeof(*part) > datasize)
+ return -1;
+ part = (struct mac_partition *) (data + partoffset);
if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC) {
put_dev_sector(sect);
return 0; /* not a MacOS disk */
diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c
index b4ffc5be1a93..e5b5721809e2 100644
--- a/crypto/ablkcipher.c
+++ b/crypto/ablkcipher.c
@@ -277,12 +277,12 @@ static int ablkcipher_walk_first(struct ablkcipher_request *req,
if (WARN_ON_ONCE(in_irq()))
return -EDEADLK;
+ walk->iv = req->info;
walk->nbytes = walk->total;
if (unlikely(!walk->total))
return 0;
walk->iv_buffer = NULL;
- walk->iv = req->info;
if (unlikely(((unsigned long)walk->iv & alignmask))) {
int err = ablkcipher_copy_iv(walk, tfm, alignmask);
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 0aa6fdfb448a..6d4d4569447e 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -125,7 +125,7 @@ static int aead_wait_for_data(struct sock *sk, unsigned flags)
if (flags & MSG_DONTWAIT)
return -EAGAIN;
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
for (;;) {
if (signal_pending(current))
@@ -139,7 +139,7 @@ static int aead_wait_for_data(struct sock *sk, unsigned flags)
}
finish_wait(sk_sleep(sk), &wait);
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
return err;
}
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index af31a0ee4057..634b4d1ab681 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -47,7 +47,7 @@ struct skcipher_ctx {
bool merge;
bool enc;
- struct ablkcipher_request req;
+ struct skcipher_request req;
};
struct skcipher_async_rsgl {
@@ -64,13 +64,13 @@ struct skcipher_async_req {
};
#define GET_SREQ(areq, ctx) (struct skcipher_async_req *)((char *)areq + \
- crypto_ablkcipher_reqsize(crypto_ablkcipher_reqtfm(&ctx->req)))
+ crypto_skcipher_reqsize(crypto_skcipher_reqtfm(&ctx->req)))
#define GET_REQ_SIZE(ctx) \
- crypto_ablkcipher_reqsize(crypto_ablkcipher_reqtfm(&ctx->req))
+ crypto_skcipher_reqsize(crypto_skcipher_reqtfm(&ctx->req))
#define GET_IV_SIZE(ctx) \
- crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(&ctx->req))
+ crypto_skcipher_ivsize(crypto_skcipher_reqtfm(&ctx->req))
#define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \
sizeof(struct scatterlist) - 1)
@@ -212,7 +212,7 @@ static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags)
if (flags & MSG_DONTWAIT)
return -EAGAIN;
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
for (;;) {
if (signal_pending(current))
@@ -258,7 +258,7 @@ static int skcipher_wait_for_data(struct sock *sk, unsigned flags)
return -EAGAIN;
}
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
for (;;) {
if (signal_pending(current))
@@ -272,7 +272,7 @@ static int skcipher_wait_for_data(struct sock *sk, unsigned flags)
}
finish_wait(sk_sleep(sk), &wait);
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
return err;
}
@@ -302,8 +302,8 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct skcipher_ctx *ctx = ask->private;
- struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(&ctx->req);
- unsigned ivsize = crypto_ablkcipher_ivsize(tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(&ctx->req);
+ unsigned ivsize = crypto_skcipher_ivsize(tfm);
struct skcipher_sg_list *sgl;
struct af_alg_control con = {};
long copied = 0;
@@ -507,7 +507,7 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
struct skcipher_sg_list *sgl;
struct scatterlist *sg;
struct skcipher_async_req *sreq;
- struct ablkcipher_request *req;
+ struct skcipher_request *req;
struct skcipher_async_rsgl *last_rsgl = NULL;
unsigned int txbufs = 0, len = 0, tx_nents = skcipher_all_sg_nents(ctx);
unsigned int reqlen = sizeof(struct skcipher_async_req) +
@@ -531,9 +531,9 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
}
sg_init_table(sreq->tsg, tx_nents);
memcpy(sreq->iv, ctx->iv, GET_IV_SIZE(ctx));
- ablkcipher_request_set_tfm(req, crypto_ablkcipher_reqtfm(&ctx->req));
- ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- skcipher_async_cb, sk);
+ skcipher_request_set_tfm(req, crypto_skcipher_reqtfm(&ctx->req));
+ skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ skcipher_async_cb, sk);
while (iov_iter_count(&msg->msg_iter)) {
struct skcipher_async_rsgl *rsgl;
@@ -608,10 +608,10 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
if (mark)
sg_mark_end(sreq->tsg + txbufs - 1);
- ablkcipher_request_set_crypt(req, sreq->tsg, sreq->first_sgl.sgl.sg,
- len, sreq->iv);
- err = ctx->enc ? crypto_ablkcipher_encrypt(req) :
- crypto_ablkcipher_decrypt(req);
+ skcipher_request_set_crypt(req, sreq->tsg, sreq->first_sgl.sgl.sg,
+ len, sreq->iv);
+ err = ctx->enc ? crypto_skcipher_encrypt(req) :
+ crypto_skcipher_decrypt(req);
if (err == -EINPROGRESS) {
atomic_inc(&ctx->inflight);
err = -EIOCBQUEUED;
@@ -632,7 +632,7 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct skcipher_ctx *ctx = ask->private;
- unsigned bs = crypto_ablkcipher_blocksize(crypto_ablkcipher_reqtfm(
+ unsigned bs = crypto_skcipher_blocksize(crypto_skcipher_reqtfm(
&ctx->req));
struct skcipher_sg_list *sgl;
struct scatterlist *sg;
@@ -669,14 +669,13 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
if (!used)
goto free;
- ablkcipher_request_set_crypt(&ctx->req, sg,
- ctx->rsgl.sg, used,
- ctx->iv);
+ skcipher_request_set_crypt(&ctx->req, sg, ctx->rsgl.sg, used,
+ ctx->iv);
err = af_alg_wait_for_completion(
ctx->enc ?
- crypto_ablkcipher_encrypt(&ctx->req) :
- crypto_ablkcipher_decrypt(&ctx->req),
+ crypto_skcipher_encrypt(&ctx->req) :
+ crypto_skcipher_decrypt(&ctx->req),
&ctx->completion);
free:
@@ -751,17 +750,17 @@ static struct proto_ops algif_skcipher_ops = {
static void *skcipher_bind(const char *name, u32 type, u32 mask)
{
- return crypto_alloc_ablkcipher(name, type, mask);
+ return crypto_alloc_skcipher(name, type, mask);
}
static void skcipher_release(void *private)
{
- crypto_free_ablkcipher(private);
+ crypto_free_skcipher(private);
}
static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
{
- return crypto_ablkcipher_setkey(private, key, keylen);
+ return crypto_skcipher_setkey(private, key, keylen);
}
static void skcipher_wait(struct sock *sk)
@@ -778,13 +777,13 @@ static void skcipher_sock_destruct(struct sock *sk)
{
struct alg_sock *ask = alg_sk(sk);
struct skcipher_ctx *ctx = ask->private;
- struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(&ctx->req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(&ctx->req);
if (atomic_read(&ctx->inflight))
skcipher_wait(sk);
skcipher_free_sgl(sk);
- sock_kzfree_s(sk, ctx->iv, crypto_ablkcipher_ivsize(tfm));
+ sock_kzfree_s(sk, ctx->iv, crypto_skcipher_ivsize(tfm));
sock_kfree_s(sk, ctx, ctx->len);
af_alg_release_parent(sk);
}
@@ -793,20 +792,20 @@ static int skcipher_accept_parent(void *private, struct sock *sk)
{
struct skcipher_ctx *ctx;
struct alg_sock *ask = alg_sk(sk);
- unsigned int len = sizeof(*ctx) + crypto_ablkcipher_reqsize(private);
+ unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(private);
ctx = sock_kmalloc(sk, len, GFP_KERNEL);
if (!ctx)
return -ENOMEM;
- ctx->iv = sock_kmalloc(sk, crypto_ablkcipher_ivsize(private),
+ ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(private),
GFP_KERNEL);
if (!ctx->iv) {
sock_kfree_s(sk, ctx, len);
return -ENOMEM;
}
- memset(ctx->iv, 0, crypto_ablkcipher_ivsize(private));
+ memset(ctx->iv, 0, crypto_skcipher_ivsize(private));
INIT_LIST_HEAD(&ctx->tsgl);
ctx->len = len;
@@ -819,9 +818,9 @@ static int skcipher_accept_parent(void *private, struct sock *sk)
ask->private = ctx;
- ablkcipher_request_set_tfm(&ctx->req, private);
- ablkcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- af_alg_complete, &ctx->completion);
+ skcipher_request_set_tfm(&ctx->req, private);
+ skcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ af_alg_complete, &ctx->completion);
sk->sk_destruct = skcipher_sock_destruct;
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index f8c0b8dbeb75..88bc8e6b2a54 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -53,7 +53,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
struct dmaengine_unmap_data *unmap = NULL;
if (device)
- unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOIO);
+ unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT);
if (unmap && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
unsigned long dma_prep_flags = 0;
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index 5d355e0c2633..c0748bbd4c08 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -188,7 +188,7 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
if (device)
- unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
+ unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOWAIT);
/* XORing P/Q is only implemented in software */
if (unmap && !(submit->flags & ASYNC_TX_PQ_XOR_DST) &&
@@ -307,7 +307,7 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
BUG_ON(disks < 4);
if (device)
- unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
+ unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOWAIT);
if (unmap && disks <= dma_maxpq(device, 0) &&
is_dma_pq_aligned(device, offset, 0, len)) {
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
index 934a84981495..8fab6275ea1f 100644
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -41,7 +41,7 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
u8 *a, *b, *c;
if (dma)
- unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
+ unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOWAIT);
if (unmap) {
struct device *dev = dma->dev;
@@ -105,7 +105,7 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
u8 *d, *s;
if (dma)
- unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
+ unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOWAIT);
if (unmap) {
dma_addr_t dma_dest[2];
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index e1bce26cd4f9..da75777f2b3f 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -182,7 +182,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
BUG_ON(src_cnt <= 1);
if (device)
- unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOIO);
+ unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOWAIT);
if (unmap && is_dma_xor_aligned(device, offset, 0, len)) {
struct dma_async_tx_descriptor *tx;
@@ -278,7 +278,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
BUG_ON(src_cnt <= 1);
if (device)
- unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOIO);
+ unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOWAIT);
if (unmap && src_cnt <= device->max_xor &&
is_dma_xor_aligned(device, offset, 0, len)) {
diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index 11b981492031..8cc1622b2ee0 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -326,12 +326,12 @@ static int blkcipher_walk_first(struct blkcipher_desc *desc,
if (WARN_ON_ONCE(in_irq()))
return -EDEADLK;
+ walk->iv = desc->info;
walk->nbytes = walk->total;
if (unlikely(!walk->total))
return 0;
walk->buffer = NULL;
- walk->iv = desc->info;
if (unlikely(((unsigned long)walk->iv & walk->alignmask))) {
int err = blkcipher_copy_iv(walk);
if (err)
diff --git a/drivers/Makefile b/drivers/Makefile
index 73d039156ea7..795d0ca714bf 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_FB_I810) += video/fbdev/i810/
obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
obj-$(CONFIG_PARPORT) += parport/
+obj-$(CONFIG_NVM) += lightnvm/
obj-y += base/ block/ misc/ mfd/ nfc/
obj-$(CONFIG_LIBNVDIMM) += nvdimm/
obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/
@@ -70,7 +71,6 @@ obj-$(CONFIG_NUBUS) += nubus/
obj-y += macintosh/
obj-$(CONFIG_IDE) += ide/
obj-$(CONFIG_SCSI) += scsi/
-obj-$(CONFIG_NVM) += lightnvm/
obj-y += nvme/
obj-$(CONFIG_ATA) += ata/
obj-$(CONFIG_TARGET_CORE) += target/
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 25dbb76c02cc..5eef4cb4f70e 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -58,10 +58,10 @@ config ACPI_CCA_REQUIRED
bool
config ACPI_DEBUGGER
- bool "In-kernel debugger (EXPERIMENTAL)"
+ bool "AML debugger interface (EXPERIMENTAL)"
select ACPI_DEBUG
help
- Enable in-kernel debugging facilities: statistics, internal
+ Enable in-kernel debugging of AML facilities: statistics, internal
object dump, single step control method execution.
This is still under development, currently enabling this only
results in the compilation of the ACPICA debugger files.
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 3c083d2cc434..6730f965b379 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -304,7 +304,7 @@ EXPORT_SYMBOL_GPL(acpi_get_psd_map);
static int register_pcc_channel(int pcc_subspace_idx)
{
- struct acpi_pcct_subspace *cppc_ss;
+ struct acpi_pcct_hw_reduced *cppc_ss;
unsigned int len;
if (pcc_subspace_idx >= 0) {
diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c
index 707cf6213bc2..b9afb47db7ed 100644
--- a/drivers/acpi/device_sysfs.c
+++ b/drivers/acpi/device_sysfs.c
@@ -104,7 +104,7 @@ static void acpi_expose_nondev_subnodes(struct kobject *kobj,
init_completion(&dn->kobj_done);
ret = kobject_init_and_add(&dn->kobj, &acpi_data_node_ktype,
- kobj, dn->name);
+ kobj, "%s", dn->name);
if (ret)
acpi_handle_err(dn->handle, "Failed to expose (%d)\n", ret);
else
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index f61a7c834540..b420fb46669d 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1103,7 +1103,7 @@ static int acpi_ec_query(struct acpi_ec *ec, u8 *data)
}
err_exit:
- if (result && q)
+ if (result)
acpi_ec_delete_query(q);
if (data)
*data = value;
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index f7dab53b352a..aa45d4802707 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -233,11 +233,12 @@ static bool add_spa(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_system_address *spa)
{
+ size_t length = min_t(size_t, sizeof(*spa), spa->header.length);
struct device *dev = acpi_desc->dev;
struct nfit_spa *nfit_spa;
list_for_each_entry(nfit_spa, &prev->spas, list) {
- if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) {
+ if (memcmp(nfit_spa->spa, spa, length) == 0) {
list_move_tail(&nfit_spa->list, &acpi_desc->spas);
return true;
}
@@ -259,11 +260,12 @@ static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_memory_map *memdev)
{
+ size_t length = min_t(size_t, sizeof(*memdev), memdev->header.length);
struct device *dev = acpi_desc->dev;
struct nfit_memdev *nfit_memdev;
list_for_each_entry(nfit_memdev, &prev->memdevs, list)
- if (memcmp(nfit_memdev->memdev, memdev, sizeof(*memdev)) == 0) {
+ if (memcmp(nfit_memdev->memdev, memdev, length) == 0) {
list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs);
return true;
}
@@ -284,11 +286,12 @@ static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_control_region *dcr)
{
+ size_t length = min_t(size_t, sizeof(*dcr), dcr->header.length);
struct device *dev = acpi_desc->dev;
struct nfit_dcr *nfit_dcr;
list_for_each_entry(nfit_dcr, &prev->dcrs, list)
- if (memcmp(nfit_dcr->dcr, dcr, sizeof(*dcr)) == 0) {
+ if (memcmp(nfit_dcr->dcr, dcr, length) == 0) {
list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs);
return true;
}
@@ -308,11 +311,12 @@ static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_data_region *bdw)
{
+ size_t length = min_t(size_t, sizeof(*bdw), bdw->header.length);
struct device *dev = acpi_desc->dev;
struct nfit_bdw *nfit_bdw;
list_for_each_entry(nfit_bdw, &prev->bdws, list)
- if (memcmp(nfit_bdw->bdw, bdw, sizeof(*bdw)) == 0) {
+ if (memcmp(nfit_bdw->bdw, bdw, length) == 0) {
list_move_tail(&nfit_bdw->list, &acpi_desc->bdws);
return true;
}
@@ -332,11 +336,12 @@ static bool add_idt(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_interleave *idt)
{
+ size_t length = min_t(size_t, sizeof(*idt), idt->header.length);
struct device *dev = acpi_desc->dev;
struct nfit_idt *nfit_idt;
list_for_each_entry(nfit_idt, &prev->idts, list)
- if (memcmp(nfit_idt->idt, idt, sizeof(*idt)) == 0) {
+ if (memcmp(nfit_idt->idt, idt, length) == 0) {
list_move_tail(&nfit_idt->list, &acpi_desc->idts);
return true;
}
@@ -356,11 +361,12 @@ static bool add_flush(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev,
struct acpi_nfit_flush_address *flush)
{
+ size_t length = min_t(size_t, sizeof(*flush), flush->header.length);
struct device *dev = acpi_desc->dev;
struct nfit_flush *nfit_flush;
list_for_each_entry(nfit_flush, &prev->flushes, list)
- if (memcmp(nfit_flush->flush, flush, sizeof(*flush)) == 0) {
+ if (memcmp(nfit_flush->flush, flush, length) == 0) {
list_move_tail(&nfit_flush->list, &acpi_desc->flushes);
return true;
}
@@ -655,7 +661,7 @@ static ssize_t revision_show(struct device *dev,
struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
- return sprintf(buf, "%d\n", acpi_desc->nfit->header.revision);
+ return sprintf(buf, "%d\n", acpi_desc->acpi_header.revision);
}
static DEVICE_ATTR_RO(revision);
@@ -1652,7 +1658,6 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
data = (u8 *) acpi_desc->nfit;
end = data + sz;
- data += sizeof(struct acpi_table_nfit);
while (!IS_ERR_OR_NULL(data))
data = add_table(acpi_desc, &prev, data, end);
@@ -1748,13 +1753,29 @@ static int acpi_nfit_add(struct acpi_device *adev)
return PTR_ERR(acpi_desc);
}
- acpi_desc->nfit = (struct acpi_table_nfit *) tbl;
+ /*
+ * Save the acpi header for later and then skip it,
+ * making nfit point to the first nfit table header.
+ */
+ acpi_desc->acpi_header = *tbl;
+ acpi_desc->nfit = (void *) tbl + sizeof(struct acpi_table_nfit);
+ sz -= sizeof(struct acpi_table_nfit);
/* Evaluate _FIT and override with that if present */
status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
if (ACPI_SUCCESS(status) && buf.length > 0) {
- acpi_desc->nfit = (struct acpi_table_nfit *)buf.pointer;
- sz = buf.length;
+ union acpi_object *obj;
+ /*
+ * Adjust for the acpi_object header of the _FIT
+ */
+ obj = buf.pointer;
+ if (obj->type == ACPI_TYPE_BUFFER) {
+ acpi_desc->nfit =
+ (struct acpi_nfit_header *)obj->buffer.pointer;
+ sz = obj->buffer.length;
+ } else
+ dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n",
+ __func__, (int) obj->type);
}
rc = acpi_nfit_init(acpi_desc, sz);
@@ -1777,7 +1798,8 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
{
struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
- struct acpi_table_nfit *nfit_saved;
+ struct acpi_nfit_header *nfit_saved;
+ union acpi_object *obj;
struct device *dev = &adev->dev;
acpi_status status;
int ret;
@@ -1788,7 +1810,7 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
if (!dev->driver) {
/* dev->driver may be null if we're being removed */
dev_dbg(dev, "%s: no driver found for dev\n", __func__);
- return;
+ goto out_unlock;
}
if (!acpi_desc) {
@@ -1808,12 +1830,19 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
}
nfit_saved = acpi_desc->nfit;
- acpi_desc->nfit = (struct acpi_table_nfit *)buf.pointer;
- ret = acpi_nfit_init(acpi_desc, buf.length);
- if (!ret) {
- /* Merge failed, restore old nfit, and exit */
- acpi_desc->nfit = nfit_saved;
- dev_err(dev, "failed to merge updated NFIT\n");
+ obj = buf.pointer;
+ if (obj->type == ACPI_TYPE_BUFFER) {
+ acpi_desc->nfit =
+ (struct acpi_nfit_header *)obj->buffer.pointer;
+ ret = acpi_nfit_init(acpi_desc, obj->buffer.length);
+ if (ret) {
+ /* Merge failed, restore old nfit, and exit */
+ acpi_desc->nfit = nfit_saved;
+ dev_err(dev, "failed to merge updated NFIT\n");
+ }
+ } else {
+ /* Bad _FIT, restore old nfit */
+ dev_err(dev, "Invalid _FIT\n");
}
kfree(buf.pointer);
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index 2ea5c0797c8f..3d549a383659 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -96,7 +96,8 @@ struct nfit_mem {
struct acpi_nfit_desc {
struct nvdimm_bus_descriptor nd_desc;
- struct acpi_table_nfit *nfit;
+ struct acpi_table_header acpi_header;
+ struct acpi_nfit_header *nfit;
struct mutex spa_map_mutex;
struct mutex init_mutex;
struct list_head spa_maps;
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 850d7bf0c873..ae3fe4e64203 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -768,6 +768,13 @@ static void pci_acpi_root_add_resources(struct acpi_pci_root_info *info)
else
continue;
+ /*
+ * Some legacy x86 host bridge drivers use iomem_resource and
+ * ioport_resource as default resource pool, skip it.
+ */
+ if (res == root)
+ continue;
+
conflict = insert_resource_conflict(root, res);
if (conflict) {
dev_info(&info->bridge->dev,
diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index f4e02ae93f58..11154a330f07 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -200,7 +200,8 @@ static int acpi_pss_perf_init(struct acpi_processor *pr,
goto err_remove_sysfs_thermal;
}
- sysfs_remove_link(&pr->cdev->device.kobj, "device");
+ return 0;
+
err_remove_sysfs_thermal:
sysfs_remove_link(&device->dev.kobj, "thermal_cooling");
err_thermal_unregister:
diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c
index bf034f8b7c1a..2fa8304171e0 100644
--- a/drivers/acpi/sbshc.c
+++ b/drivers/acpi/sbshc.c
@@ -14,7 +14,6 @@
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/interrupt.h>
-#include <linux/dmi.h>
#include "sbshc.h"
#define PREFIX "ACPI: "
@@ -30,6 +29,7 @@ struct acpi_smb_hc {
u8 query_bit;
smbus_alarm_callback callback;
void *context;
+ bool done;
};
static int acpi_smbus_hc_add(struct acpi_device *device);
@@ -88,8 +88,6 @@ enum acpi_smb_offset {
ACPI_SMB_ALARM_DATA = 0x26, /* 2 bytes alarm data */
};
-static bool macbook;
-
static inline int smb_hc_read(struct acpi_smb_hc *hc, u8 address, u8 *data)
{
return ec_read(hc->offset + address, data);
@@ -100,27 +98,11 @@ static inline int smb_hc_write(struct acpi_smb_hc *hc, u8 address, u8 data)
return ec_write(hc->offset + address, data);
}
-static inline int smb_check_done(struct acpi_smb_hc *hc)
-{
- union acpi_smb_status status = {.raw = 0};
- smb_hc_read(hc, ACPI_SMB_STATUS, &status.raw);
- return status.fields.done && (status.fields.status == SMBUS_OK);
-}
-
static int wait_transaction_complete(struct acpi_smb_hc *hc, int timeout)
{
- if (wait_event_timeout(hc->wait, smb_check_done(hc),
- msecs_to_jiffies(timeout)))
+ if (wait_event_timeout(hc->wait, hc->done, msecs_to_jiffies(timeout)))
return 0;
- /*
- * After the timeout happens, OS will try to check the status of SMbus.
- * If the status is what OS expected, it will be regarded as the bogus
- * timeout.
- */
- if (smb_check_done(hc))
- return 0;
- else
- return -ETIME;
+ return -ETIME;
}
static int acpi_smbus_transaction(struct acpi_smb_hc *hc, u8 protocol,
@@ -135,8 +117,7 @@ static int acpi_smbus_transaction(struct acpi_smb_hc *hc, u8 protocol,
}
mutex_lock(&hc->lock);
- if (macbook)
- udelay(5);
+ hc->done = false;
if (smb_hc_read(hc, ACPI_SMB_PROTOCOL, &temp))
goto end;
if (temp) {
@@ -235,8 +216,10 @@ static int smbus_alarm(void *context)
if (smb_hc_read(hc, ACPI_SMB_STATUS, &status.raw))
return 0;
/* Check if it is only a completion notify */
- if (status.fields.done)
+ if (status.fields.done && status.fields.status == SMBUS_OK) {
+ hc->done = true;
wake_up(&hc->wait);
+ }
if (!status.fields.alarm)
return 0;
mutex_lock(&hc->lock);
@@ -262,29 +245,12 @@ extern int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit,
acpi_handle handle, acpi_ec_query_func func,
void *data);
-static int macbook_dmi_match(const struct dmi_system_id *d)
-{
- pr_debug("Detected MacBook, enabling workaround\n");
- macbook = true;
- return 0;
-}
-
-static struct dmi_system_id acpi_smbus_dmi_table[] = {
- { macbook_dmi_match, "Apple MacBook", {
- DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
- DMI_MATCH(DMI_PRODUCT_NAME, "MacBook") },
- },
- { },
-};
-
static int acpi_smbus_hc_add(struct acpi_device *device)
{
int status;
unsigned long long val;
struct acpi_smb_hc *hc;
- dmi_check_system(acpi_smbus_dmi_table);
-
if (!device)
return -EINVAL;
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index ff02bb4218fc..cdfbcc54821f 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -314,16 +314,6 @@ static const struct pci_device_id ahci_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, 0x1f37), board_ahci_avn }, /* Avoton RAID */
{ PCI_VDEVICE(INTEL, 0x1f3e), board_ahci_avn }, /* Avoton RAID */
{ PCI_VDEVICE(INTEL, 0x1f3f), board_ahci_avn }, /* Avoton RAID */
- { PCI_VDEVICE(INTEL, 0xa182), board_ahci }, /* Lewisburg AHCI*/
- { PCI_VDEVICE(INTEL, 0xa202), board_ahci }, /* Lewisburg AHCI*/
- { PCI_VDEVICE(INTEL, 0xa184), board_ahci }, /* Lewisburg RAID*/
- { PCI_VDEVICE(INTEL, 0xa204), board_ahci }, /* Lewisburg RAID*/
- { PCI_VDEVICE(INTEL, 0xa186), board_ahci }, /* Lewisburg RAID*/
- { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/
- { PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/
- { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* Lewisburg RAID*/
- { PCI_VDEVICE(INTEL, 0xa18e), board_ahci }, /* Lewisburg RAID*/
- { PCI_VDEVICE(INTEL, 0xa20e), board_ahci }, /* Lewisburg RAID*/
{ PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Wellsburg RAID */
{ PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Wellsburg RAID */
{ PCI_VDEVICE(INTEL, 0x8d02), board_ahci }, /* Wellsburg AHCI */
@@ -350,10 +340,22 @@ static const struct pci_device_id ahci_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, 0x9d03), board_ahci }, /* Sunrise Point-LP AHCI */
{ PCI_VDEVICE(INTEL, 0x9d05), board_ahci }, /* Sunrise Point-LP RAID */
{ PCI_VDEVICE(INTEL, 0x9d07), board_ahci }, /* Sunrise Point-LP RAID */
+ { PCI_VDEVICE(INTEL, 0xa102), board_ahci }, /* Sunrise Point-H AHCI */
{ PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H AHCI */
{ PCI_VDEVICE(INTEL, 0xa105), board_ahci }, /* Sunrise Point-H RAID */
+ { PCI_VDEVICE(INTEL, 0xa106), board_ahci }, /* Sunrise Point-H RAID */
{ PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */
{ PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */
+ { PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0xa182), board_ahci }, /* Lewisburg AHCI*/
+ { PCI_VDEVICE(INTEL, 0xa184), board_ahci }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0xa186), board_ahci }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0xa18e), board_ahci }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0xa202), board_ahci }, /* Lewisburg AHCI*/
+ { PCI_VDEVICE(INTEL, 0xa204), board_ahci }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0xa20e), board_ahci }, /* Lewisburg RAID*/
/* JMicron 360/1/3/5/6, match class to avoid IDE function */
{ PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c
index 8490d37aee2a..f7a7fa81740e 100644
--- a/drivers/ata/ahci_mvebu.c
+++ b/drivers/ata/ahci_mvebu.c
@@ -62,6 +62,7 @@ static void ahci_mvebu_regret_option(struct ahci_host_priv *hpriv)
writel(0x80, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA);
}
+#ifdef CONFIG_PM_SLEEP
static int ahci_mvebu_suspend(struct platform_device *pdev, pm_message_t state)
{
return ahci_platform_suspend_host(&pdev->dev);
@@ -81,6 +82,10 @@ static int ahci_mvebu_resume(struct platform_device *pdev)
return ahci_platform_resume_host(&pdev->dev);
}
+#else
+#define ahci_mvebu_suspend NULL
+#define ahci_mvebu_resume NULL
+#endif
static const struct ata_port_info ahci_mvebu_port_info = {
.flags = AHCI_FLAG_COMMON,
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 096064cd6c52..4665512dae44 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -1273,6 +1273,15 @@ static int ahci_exec_polled_cmd(struct ata_port *ap, int pmp,
ata_tf_to_fis(tf, pmp, is_cmd, fis);
ahci_fill_cmd_slot(pp, 0, cmd_fis_len | flags | (pmp << 12));
+ /* set port value for softreset of Port Multiplier */
+ if (pp->fbs_enabled && pp->fbs_last_dev != pmp) {
+ tmp = readl(port_mmio + PORT_FBS);
+ tmp &= ~(PORT_FBS_DEV_MASK | PORT_FBS_DEC);
+ tmp |= pmp << PORT_FBS_DEV_OFFSET;
+ writel(tmp, port_mmio + PORT_FBS);
+ pp->fbs_last_dev = pmp;
+ }
+
/* issue & wait */
writel(1, port_mmio + PORT_CMD_ISSUE);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index cb0508af1459..961acc788f44 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -1505,12 +1505,20 @@ static const char *ata_err_string(unsigned int err_mask)
unsigned int ata_read_log_page(struct ata_device *dev, u8 log,
u8 page, void *buf, unsigned int sectors)
{
+ unsigned long ap_flags = dev->link->ap->flags;
struct ata_taskfile tf;
unsigned int err_mask;
bool dma = false;
DPRINTK("read log page - log 0x%x, page 0x%x\n", log, page);
+ /*
+ * Return error without actually issuing the command on controllers
+ * which e.g. lockup on a read log page.
+ */
+ if (ap_flags & ATA_FLAG_NO_LOG_PAGE)
+ return AC_ERR_DEV;
+
retry:
ata_tf_init(dev, &tf);
if (dev->dma_mode && ata_id_has_read_log_dma_ext(dev->id) &&
diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index 5389579c5120..a723ae929783 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -45,7 +45,8 @@ enum {
SATA_FSL_MAX_PRD_DIRECT = 16, /* Direct PRDT entries */
SATA_FSL_HOST_FLAGS = (ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
- ATA_FLAG_PMP | ATA_FLAG_NCQ | ATA_FLAG_AN),
+ ATA_FLAG_PMP | ATA_FLAG_NCQ |
+ ATA_FLAG_AN | ATA_FLAG_NO_LOG_PAGE),
SATA_FSL_MAX_CMDS = SATA_FSL_QUEUE_DEPTH,
SATA_FSL_CMD_HDR_SIZE = 16, /* 4 DWORDS */
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index dea6edcbf145..29bcff086bce 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -630,6 +630,9 @@ static void sil_dev_config(struct ata_device *dev)
unsigned int n, quirks = 0;
unsigned char model_num[ATA_ID_PROD_LEN + 1];
+ /* This controller doesn't support trim */
+ dev->horkage |= ATA_HORKAGE_NOTRIM;
+
ata_id_c_string(dev->id, model_num, ATA_ID_PROD, sizeof(model_num));
for (n = 0; sil_blacklist[n].product; n++)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 2804aed3f416..25425d3f2575 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -303,6 +303,10 @@ static int memory_subsys_offline(struct device *dev)
if (mem->state == MEM_OFFLINE)
return 0;
+ /* Can't offline block with non-present sections */
+ if (mem->section_count != sections_per_block)
+ return -EINVAL;
+
return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
}
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 5df4575b5ba7..47c43386786b 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -24,13 +24,17 @@
#include <linux/msi.h>
#include <linux/slab.h>
-#define DEV_ID_SHIFT 24
+#define DEV_ID_SHIFT 21
+#define MAX_DEV_MSIS (1 << (32 - DEV_ID_SHIFT))
/*
* Internal data structure containing a (made up, but unique) devid
* and the callback to write the MSI message.
*/
struct platform_msi_priv_data {
+ struct device *dev;
+ void *host_data;
+ msi_alloc_info_t arg;
irq_write_msi_msg_t write_msg;
int devid;
};
@@ -110,39 +114,49 @@ static void platform_msi_update_chip_ops(struct msi_domain_info *info)
chip->irq_write_msi_msg = platform_msi_write_msg;
}
-static void platform_msi_free_descs(struct device *dev)
+static void platform_msi_free_descs(struct device *dev, int base, int nvec)
{
struct msi_desc *desc, *tmp;
list_for_each_entry_safe(desc, tmp, dev_to_msi_list(dev), list) {
- list_del(&desc->list);
- free_msi_entry(desc);
+ if (desc->platform.msi_index >= base &&
+ desc->platform.msi_index < (base + nvec)) {
+ list_del(&desc->list);
+ free_msi_entry(desc);
+ }
}
}
-static int platform_msi_alloc_descs(struct device *dev, int nvec,
- struct platform_msi_priv_data *data)
+static int platform_msi_alloc_descs_with_irq(struct device *dev, int virq,
+ int nvec,
+ struct platform_msi_priv_data *data)
{
- int i;
+ struct msi_desc *desc;
+ int i, base = 0;
- for (i = 0; i < nvec; i++) {
- struct msi_desc *desc;
+ if (!list_empty(dev_to_msi_list(dev))) {
+ desc = list_last_entry(dev_to_msi_list(dev),
+ struct msi_desc, list);
+ base = desc->platform.msi_index + 1;
+ }
+ for (i = 0; i < nvec; i++) {
desc = alloc_msi_entry(dev);
if (!desc)
break;
desc->platform.msi_priv_data = data;
- desc->platform.msi_index = i;
+ desc->platform.msi_index = base + i;
desc->nvec_used = 1;
+ desc->irq = virq ? virq + i : 0;
list_add_tail(&desc->list, dev_to_msi_list(dev));
}
if (i != nvec) {
/* Clean up the mess */
- platform_msi_free_descs(dev);
+ platform_msi_free_descs(dev, base, nvec);
return -ENOMEM;
}
@@ -150,6 +164,13 @@ static int platform_msi_alloc_descs(struct device *dev, int nvec,
return 0;
}
+static int platform_msi_alloc_descs(struct device *dev, int nvec,
+ struct platform_msi_priv_data *data)
+
+{
+ return platform_msi_alloc_descs_with_irq(dev, 0, nvec, data);
+}
+
/**
* platform_msi_create_irq_domain - Create a platform MSI interrupt domain
* @fwnode: Optional fwnode of the interrupt controller
@@ -180,56 +201,75 @@ struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode,
return domain;
}
-/**
- * platform_msi_domain_alloc_irqs - Allocate MSI interrupts for @dev
- * @dev: The device for which to allocate interrupts
- * @nvec: The number of interrupts to allocate
- * @write_msi_msg: Callback to write an interrupt message for @dev
- *
- * Returns:
- * Zero for success, or an error code in case of failure
- */
-int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
- irq_write_msi_msg_t write_msi_msg)
+static struct platform_msi_priv_data *
+platform_msi_alloc_priv_data(struct device *dev, unsigned int nvec,
+ irq_write_msi_msg_t write_msi_msg)
{
- struct platform_msi_priv_data *priv_data;
- int err;
-
+ struct platform_msi_priv_data *datap;
/*
* Limit the number of interrupts to 256 per device. Should we
* need to bump this up, DEV_ID_SHIFT should be adjusted
* accordingly (which would impact the max number of MSI
* capable devices).
*/
- if (!dev->msi_domain || !write_msi_msg || !nvec ||
- nvec > (1 << (32 - DEV_ID_SHIFT)))
- return -EINVAL;
+ if (!dev->msi_domain || !write_msi_msg || !nvec || nvec > MAX_DEV_MSIS)
+ return ERR_PTR(-EINVAL);
if (dev->msi_domain->bus_token != DOMAIN_BUS_PLATFORM_MSI) {
dev_err(dev, "Incompatible msi_domain, giving up\n");
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
/* Already had a helping of MSI? Greed... */
if (!list_empty(dev_to_msi_list(dev)))
- return -EBUSY;
+ return ERR_PTR(-EBUSY);
+
+ datap = kzalloc(sizeof(*datap), GFP_KERNEL);
+ if (!datap)
+ return ERR_PTR(-ENOMEM);
+
+ datap->devid = ida_simple_get(&platform_msi_devid_ida,
+ 0, 1 << DEV_ID_SHIFT, GFP_KERNEL);
+ if (datap->devid < 0) {
+ int err = datap->devid;
+ kfree(datap);
+ return ERR_PTR(err);
+ }
- priv_data = kzalloc(sizeof(*priv_data), GFP_KERNEL);
- if (!priv_data)
- return -ENOMEM;
+ datap->write_msg = write_msi_msg;
+ datap->dev = dev;
- priv_data->devid = ida_simple_get(&platform_msi_devid_ida,
- 0, 1 << DEV_ID_SHIFT, GFP_KERNEL);
- if (priv_data->devid < 0) {
- err = priv_data->devid;
- goto out_free_data;
- }
+ return datap;
+}
+
+static void platform_msi_free_priv_data(struct platform_msi_priv_data *data)
+{
+ ida_simple_remove(&platform_msi_devid_ida, data->devid);
+ kfree(data);
+}
+
+/**
+ * platform_msi_domain_alloc_irqs - Allocate MSI interrupts for @dev
+ * @dev: The device for which to allocate interrupts
+ * @nvec: The number of interrupts to allocate
+ * @write_msi_msg: Callback to write an interrupt message for @dev
+ *
+ * Returns:
+ * Zero for success, or an error code in case of failure
+ */
+int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
+ irq_write_msi_msg_t write_msi_msg)
+{
+ struct platform_msi_priv_data *priv_data;
+ int err;
- priv_data->write_msg = write_msi_msg;
+ priv_data = platform_msi_alloc_priv_data(dev, nvec, write_msi_msg);
+ if (IS_ERR(priv_data))
+ return PTR_ERR(priv_data);
err = platform_msi_alloc_descs(dev, nvec, priv_data);
if (err)
- goto out_free_id;
+ goto out_free_priv_data;
err = msi_domain_alloc_irqs(dev->msi_domain, dev, nvec);
if (err)
@@ -238,11 +278,9 @@ int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
return 0;
out_free_desc:
- platform_msi_free_descs(dev);
-out_free_id:
- ida_simple_remove(&platform_msi_devid_ida, priv_data->devid);
-out_free_data:
- kfree(priv_data);
+ platform_msi_free_descs(dev, 0, nvec);
+out_free_priv_data:
+ platform_msi_free_priv_data(priv_data);
return err;
}
@@ -253,18 +291,126 @@ out_free_data:
*/
void platform_msi_domain_free_irqs(struct device *dev)
{
- struct msi_desc *desc;
+ if (!list_empty(dev_to_msi_list(dev))) {
+ struct msi_desc *desc;
+
+ desc = first_msi_entry(dev);
+ platform_msi_free_priv_data(desc->platform.msi_priv_data);
+ }
+
+ msi_domain_free_irqs(dev->msi_domain, dev);
+ platform_msi_free_descs(dev, 0, MAX_DEV_MSIS);
+}
+
+/**
+ * platform_msi_get_host_data - Query the private data associated with
+ * a platform-msi domain
+ * @domain: The platform-msi domain
+ *
+ * Returns the private data provided when calling
+ * platform_msi_create_device_domain.
+ */
+void *platform_msi_get_host_data(struct irq_domain *domain)
+{
+ struct platform_msi_priv_data *data = domain->host_data;
+ return data->host_data;
+}
+
+/**
+ * platform_msi_create_device_domain - Create a platform-msi domain
+ *
+ * @dev: The device generating the MSIs
+ * @nvec: The number of MSIs that need to be allocated
+ * @write_msi_msg: Callback to write an interrupt message for @dev
+ * @ops: The hierarchy domain operations to use
+ * @host_data: Private data associated to this domain
+ *
+ * Returns an irqdomain for @nvec interrupts
+ */
+struct irq_domain *
+platform_msi_create_device_domain(struct device *dev,
+ unsigned int nvec,
+ irq_write_msi_msg_t write_msi_msg,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+{
+ struct platform_msi_priv_data *data;
+ struct irq_domain *domain;
+ int err;
+
+ data = platform_msi_alloc_priv_data(dev, nvec, write_msi_msg);
+ if (IS_ERR(data))
+ return NULL;
+
+ data->host_data = host_data;
+ domain = irq_domain_create_hierarchy(dev->msi_domain, 0, nvec,
+ of_node_to_fwnode(dev->of_node),
+ ops, data);
+ if (!domain)
+ goto free_priv;
- desc = first_msi_entry(dev);
- if (desc) {
- struct platform_msi_priv_data *data;
+ err = msi_domain_prepare_irqs(domain->parent, dev, nvec, &data->arg);
+ if (err)
+ goto free_domain;
+
+ return domain;
- data = desc->platform.msi_priv_data;
+free_domain:
+ irq_domain_remove(domain);
+free_priv:
+ platform_msi_free_priv_data(data);
+ return NULL;
+}
+
+/**
+ * platform_msi_domain_free - Free interrupts associated with a platform-msi
+ * domain
+ *
+ * @domain: The platform-msi domain
+ * @virq: The base irq from which to perform the free operation
+ * @nvec: How many interrupts to free from @virq
+ */
+void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nvec)
+{
+ struct platform_msi_priv_data *data = domain->host_data;
+ struct msi_desc *desc;
+ for_each_msi_entry(desc, data->dev) {
+ if (WARN_ON(!desc->irq || desc->nvec_used != 1))
+ return;
+ if (!(desc->irq >= virq && desc->irq < (virq + nvec)))
+ continue;
- ida_simple_remove(&platform_msi_devid_ida, data->devid);
- kfree(data);
+ irq_domain_free_irqs_common(domain, desc->irq, 1);
}
+}
- msi_domain_free_irqs(dev->msi_domain, dev);
- platform_msi_free_descs(dev);
+/**
+ * platform_msi_domain_alloc - Allocate interrupts associated with
+ * a platform-msi domain
+ *
+ * @domain: The platform-msi domain
+ * @virq: The base irq from which to perform the allocate operation
+ * @nvec: How many interrupts to free from @virq
+ *
+ * Return 0 on success, or an error code on failure. Must be called
+ * with irq_domain_mutex held (which can only be done as part of a
+ * top-level interrupt allocation).
+ */
+int platform_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct platform_msi_priv_data *data = domain->host_data;
+ int err;
+
+ err = platform_msi_alloc_descs_with_irq(data->dev, virq, nr_irqs, data);
+ if (err)
+ return err;
+
+ err = msi_domain_populate_irqs(domain->parent, data->dev,
+ virq, nr_irqs, &data->arg);
+ if (err)
+ platform_msi_domain_free(domain, virq, nr_irqs);
+
+ return err;
}
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index e03b1ad25a90..65f50eccd49b 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -390,6 +390,7 @@ static int pm_genpd_runtime_suspend(struct device *dev)
struct generic_pm_domain *genpd;
bool (*stop_ok)(struct device *__dev);
struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
+ bool runtime_pm = pm_runtime_enabled(dev);
ktime_t time_start;
s64 elapsed_ns;
int ret;
@@ -400,12 +401,19 @@ static int pm_genpd_runtime_suspend(struct device *dev)
if (IS_ERR(genpd))
return -EINVAL;
+ /*
+ * A runtime PM centric subsystem/driver may re-use the runtime PM
+ * callbacks for other purposes than runtime PM. In those scenarios
+ * runtime PM is disabled. Under these circumstances, we shall skip
+ * validating/measuring the PM QoS latency.
+ */
stop_ok = genpd->gov ? genpd->gov->stop_ok : NULL;
- if (stop_ok && !stop_ok(dev))
+ if (runtime_pm && stop_ok && !stop_ok(dev))
return -EBUSY;
/* Measure suspend latency. */
- time_start = ktime_get();
+ if (runtime_pm)
+ time_start = ktime_get();
ret = genpd_save_dev(genpd, dev);
if (ret)
@@ -418,13 +426,15 @@ static int pm_genpd_runtime_suspend(struct device *dev)
}
/* Update suspend latency value if the measured time exceeds it. */
- elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
- if (elapsed_ns > td->suspend_latency_ns) {
- td->suspend_latency_ns = elapsed_ns;
- dev_dbg(dev, "suspend latency exceeded, %lld ns\n",
- elapsed_ns);
- genpd->max_off_time_changed = true;
- td->constraint_changed = true;
+ if (runtime_pm) {
+ elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
+ if (elapsed_ns > td->suspend_latency_ns) {
+ td->suspend_latency_ns = elapsed_ns;
+ dev_dbg(dev, "suspend latency exceeded, %lld ns\n",
+ elapsed_ns);
+ genpd->max_off_time_changed = true;
+ td->constraint_changed = true;
+ }
}
/*
@@ -453,6 +463,7 @@ static int pm_genpd_runtime_resume(struct device *dev)
{
struct generic_pm_domain *genpd;
struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
+ bool runtime_pm = pm_runtime_enabled(dev);
ktime_t time_start;
s64 elapsed_ns;
int ret;
@@ -479,14 +490,14 @@ static int pm_genpd_runtime_resume(struct device *dev)
out:
/* Measure resume latency. */
- if (timed)
+ if (timed && runtime_pm)
time_start = ktime_get();
genpd_start_dev(genpd, dev);
genpd_restore_dev(genpd, dev);
/* Update resume latency value if the measured time exceeds it. */
- if (timed) {
+ if (timed && runtime_pm) {
elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
if (elapsed_ns > td->resume_latency_ns) {
td->resume_latency_ns = elapsed_ns;
@@ -1775,10 +1786,10 @@ int genpd_dev_pm_attach(struct device *dev)
}
pd = of_genpd_get_from_provider(&pd_args);
+ of_node_put(pd_args.np);
if (IS_ERR(pd)) {
dev_dbg(dev, "%s() failed to find PM domain: %ld\n",
__func__, PTR_ERR(pd));
- of_node_put(dev->of_node);
return -EPROBE_DEFER;
}
@@ -1796,7 +1807,6 @@ int genpd_dev_pm_attach(struct device *dev)
if (ret < 0) {
dev_err(dev, "failed to add to PM domain %s: %d",
pd->name, ret);
- of_node_put(dev->of_node);
goto out;
}
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index e60dd12e23aa..1e937ac5f456 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -160,9 +160,6 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
struct gpd_timing_data *td;
s64 constraint_ns;
- if (!pdd->dev->driver)
- continue;
-
/*
* Check if the device is allowed to be off long enough for the
* domain to turn off and on (that's how much time it will
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index eb6e67451dec..0d77cd6fd8d1 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -68,6 +68,9 @@ int dev_pm_set_wake_irq(struct device *dev, int irq)
struct wake_irq *wirq;
int err;
+ if (irq < 0)
+ return -EINVAL;
+
wirq = kzalloc(sizeof(*wirq), GFP_KERNEL);
if (!wirq)
return -ENOMEM;
@@ -167,6 +170,9 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
struct wake_irq *wirq;
int err;
+ if (irq < 0)
+ return -EINVAL;
+
wirq = kzalloc(sizeof(*wirq), GFP_KERNEL);
if (!wirq)
return -ENOMEM;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index a28a562f7b7f..3457ac8c03e2 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3810,7 +3810,6 @@ static int mtip_block_initialize(struct driver_data *dd)
sector_t capacity;
unsigned int index = 0;
struct kobject *kobj;
- unsigned char thd_name[16];
if (dd->disk)
goto skip_create_disk; /* hw init done, before rebuild */
@@ -3958,10 +3957,9 @@ skip_create_disk:
}
start_service_thread:
- sprintf(thd_name, "mtip_svc_thd_%02d", index);
dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread,
- dd, dd->numa_node, "%s",
- thd_name);
+ dd, dd->numa_node,
+ "mtip_svc_thd_%02d", index);
if (IS_ERR(dd->mtip_svc_handler)) {
dev_err(&dd->pdev->dev, "service thread failed to start\n");
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 6255d1c4bba4..09e3c0d87ecc 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -8,6 +8,7 @@
#include <linux/slab.h>
#include <linux/blk-mq.h>
#include <linux/hrtimer.h>
+#include <linux/lightnvm.h>
struct nullb_cmd {
struct list_head list;
@@ -17,6 +18,7 @@ struct nullb_cmd {
struct bio *bio;
unsigned int tag;
struct nullb_queue *nq;
+ struct hrtimer timer;
};
struct nullb_queue {
@@ -39,23 +41,14 @@ struct nullb {
struct nullb_queue *queues;
unsigned int nr_queues;
+ char disk_name[DISK_NAME_LEN];
};
static LIST_HEAD(nullb_list);
static struct mutex lock;
static int null_major;
static int nullb_indexes;
-
-struct completion_queue {
- struct llist_head list;
- struct hrtimer timer;
-};
-
-/*
- * These are per-cpu for now, they will need to be configured by the
- * complete_queues parameter and appropriately mapped.
- */
-static DEFINE_PER_CPU(struct completion_queue, completion_queues);
+static struct kmem_cache *ppa_cache;
enum {
NULL_IRQ_NONE = 0,
@@ -119,6 +112,10 @@ static int nr_devices = 2;
module_param(nr_devices, int, S_IRUGO);
MODULE_PARM_DESC(nr_devices, "Number of devices to register");
+static bool use_lightnvm;
+module_param(use_lightnvm, bool, S_IRUGO);
+MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device");
+
static int irqmode = NULL_IRQ_SOFTIRQ;
static int null_set_irqmode(const char *str, const struct kernel_param *kp)
@@ -135,8 +132,8 @@ static const struct kernel_param_ops null_irqmode_param_ops = {
device_param_cb(irqmode, &null_irqmode_param_ops, &irqmode, S_IRUGO);
MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
-static int completion_nsec = 10000;
-module_param(completion_nsec, int, S_IRUGO);
+static unsigned long completion_nsec = 10000;
+module_param(completion_nsec, ulong, S_IRUGO);
MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
static int hw_queue_depth = 64;
@@ -173,6 +170,8 @@ static void free_cmd(struct nullb_cmd *cmd)
put_tag(cmd->nq, cmd->tag);
}
+static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer);
+
static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
{
struct nullb_cmd *cmd;
@@ -183,6 +182,11 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
cmd = &nq->cmds[tag];
cmd->tag = tag;
cmd->nq = nq;
+ if (irqmode == NULL_IRQ_TIMER) {
+ hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ cmd->timer.function = null_cmd_timer_expired;
+ }
return cmd;
}
@@ -213,6 +217,11 @@ static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
static void end_cmd(struct nullb_cmd *cmd)
{
+ struct request_queue *q = NULL;
+
+ if (cmd->rq)
+ q = cmd->rq->q;
+
switch (queue_mode) {
case NULL_Q_MQ:
blk_mq_end_request(cmd->rq, 0);
@@ -227,51 +236,29 @@ static void end_cmd(struct nullb_cmd *cmd)
}
free_cmd(cmd);
+
+ /* Restart queue if needed, as we are freeing a tag */
+ if (queue_mode == NULL_Q_RQ && blk_queue_stopped(q)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(q->queue_lock, flags);
+ blk_start_queue_async(q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+ }
}
static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
{
- struct completion_queue *cq;
- struct llist_node *entry;
- struct nullb_cmd *cmd;
-
- cq = &per_cpu(completion_queues, smp_processor_id());
-
- while ((entry = llist_del_all(&cq->list)) != NULL) {
- entry = llist_reverse_order(entry);
- do {
- struct request_queue *q = NULL;
-
- cmd = container_of(entry, struct nullb_cmd, ll_list);
- entry = entry->next;
- if (cmd->rq)
- q = cmd->rq->q;
- end_cmd(cmd);
-
- if (q && !q->mq_ops && blk_queue_stopped(q)) {
- spin_lock(q->queue_lock);
- if (blk_queue_stopped(q))
- blk_start_queue(q);
- spin_unlock(q->queue_lock);
- }
- } while (entry);
- }
+ end_cmd(container_of(timer, struct nullb_cmd, timer));
return HRTIMER_NORESTART;
}
static void null_cmd_end_timer(struct nullb_cmd *cmd)
{
- struct completion_queue *cq = &per_cpu(completion_queues, get_cpu());
+ ktime_t kt = ktime_set(0, completion_nsec);
- cmd->ll_list.next = NULL;
- if (llist_add(&cmd->ll_list, &cq->list)) {
- ktime_t kt = ktime_set(0, completion_nsec);
-
- hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL_PINNED);
- }
-
- put_cpu();
+ hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
}
static void null_softirq_done_fn(struct request *rq)
@@ -369,6 +356,10 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
{
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+ if (irqmode == NULL_IRQ_TIMER) {
+ hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ cmd->timer.function = null_cmd_timer_expired;
+ }
cmd->rq = bd->rq;
cmd->nq = hctx->driver_data;
@@ -427,15 +418,157 @@ static void null_del_dev(struct nullb *nullb)
{
list_del_init(&nullb->list);
- del_gendisk(nullb->disk);
+ if (use_lightnvm)
+ nvm_unregister(nullb->disk_name);
+ else
+ del_gendisk(nullb->disk);
blk_cleanup_queue(nullb->q);
if (queue_mode == NULL_Q_MQ)
blk_mq_free_tag_set(&nullb->tag_set);
- put_disk(nullb->disk);
+ if (!use_lightnvm)
+ put_disk(nullb->disk);
cleanup_queues(nullb);
kfree(nullb);
}
+#ifdef CONFIG_NVM
+
+static void null_lnvm_end_io(struct request *rq, int error)
+{
+ struct nvm_rq *rqd = rq->end_io_data;
+ struct nvm_dev *dev = rqd->dev;
+
+ dev->mt->end_io(rqd, error);
+
+ blk_put_request(rq);
+}
+
+static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
+{
+ struct request_queue *q = dev->q;
+ struct request *rq;
+ struct bio *bio = rqd->bio;
+
+ rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0);
+ if (IS_ERR(rq))
+ return -ENOMEM;
+
+ rq->cmd_type = REQ_TYPE_DRV_PRIV;
+ rq->__sector = bio->bi_iter.bi_sector;
+ rq->ioprio = bio_prio(bio);
+
+ if (bio_has_data(bio))
+ rq->nr_phys_segments = bio_phys_segments(q, bio);
+
+ rq->__data_len = bio->bi_iter.bi_size;
+ rq->bio = rq->biotail = bio;
+
+ rq->end_io_data = rqd;
+
+ blk_execute_rq_nowait(q, NULL, rq, 0, null_lnvm_end_io);
+
+ return 0;
+}
+
+static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
+{
+ sector_t size = gb * 1024 * 1024 * 1024ULL;
+ sector_t blksize;
+ struct nvm_id_group *grp;
+
+ id->ver_id = 0x1;
+ id->vmnt = 0;
+ id->cgrps = 1;
+ id->cap = 0x3;
+ id->dom = 0x1;
+
+ id->ppaf.blk_offset = 0;
+ id->ppaf.blk_len = 16;
+ id->ppaf.pg_offset = 16;
+ id->ppaf.pg_len = 16;
+ id->ppaf.sect_offset = 32;
+ id->ppaf.sect_len = 8;
+ id->ppaf.pln_offset = 40;
+ id->ppaf.pln_len = 8;
+ id->ppaf.lun_offset = 48;
+ id->ppaf.lun_len = 8;
+ id->ppaf.ch_offset = 56;
+ id->ppaf.ch_len = 8;
+
+ do_div(size, bs); /* convert size to pages */
+ do_div(size, 256); /* concert size to pgs pr blk */
+ grp = &id->groups[0];
+ grp->mtype = 0;
+ grp->fmtype = 0;
+ grp->num_ch = 1;
+ grp->num_pg = 256;
+ blksize = size;
+ do_div(size, (1 << 16));
+ grp->num_lun = size + 1;
+ do_div(blksize, grp->num_lun);
+ grp->num_blk = blksize;
+ grp->num_pln = 1;
+
+ grp->fpg_sz = bs;
+ grp->csecs = bs;
+ grp->trdt = 25000;
+ grp->trdm = 25000;
+ grp->tprt = 500000;
+ grp->tprm = 500000;
+ grp->tbet = 1500000;
+ grp->tbem = 1500000;
+ grp->mpos = 0x010101; /* single plane rwe */
+ grp->cpar = hw_queue_depth;
+
+ return 0;
+}
+
+static void *null_lnvm_create_dma_pool(struct nvm_dev *dev, char *name)
+{
+ mempool_t *virtmem_pool;
+
+ virtmem_pool = mempool_create_slab_pool(64, ppa_cache);
+ if (!virtmem_pool) {
+ pr_err("null_blk: Unable to create virtual memory pool\n");
+ return NULL;
+ }
+
+ return virtmem_pool;
+}
+
+static void null_lnvm_destroy_dma_pool(void *pool)
+{
+ mempool_destroy(pool);
+}
+
+static void *null_lnvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
+ gfp_t mem_flags, dma_addr_t *dma_handler)
+{
+ return mempool_alloc(pool, mem_flags);
+}
+
+static void null_lnvm_dev_dma_free(void *pool, void *entry,
+ dma_addr_t dma_handler)
+{
+ mempool_free(entry, pool);
+}
+
+static struct nvm_dev_ops null_lnvm_dev_ops = {
+ .identity = null_lnvm_id,
+ .submit_io = null_lnvm_submit_io,
+
+ .create_dma_pool = null_lnvm_create_dma_pool,
+ .destroy_dma_pool = null_lnvm_destroy_dma_pool,
+ .dev_dma_alloc = null_lnvm_dev_dma_alloc,
+ .dev_dma_free = null_lnvm_dev_dma_free,
+
+ /* Simulate nvme protocol restriction */
+ .max_phys_sect = 64,
+};
+#else
+static struct nvm_dev_ops null_lnvm_dev_ops;
+#endif /* CONFIG_NVM */
+
static int null_open(struct block_device *bdev, fmode_t mode)
{
return 0;
@@ -575,11 +708,6 @@ static int null_add_dev(void)
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
- disk = nullb->disk = alloc_disk_node(1, home_node);
- if (!disk) {
- rv = -ENOMEM;
- goto out_cleanup_blk_queue;
- }
mutex_lock(&lock);
list_add_tail(&nullb->list, &nullb_list);
@@ -589,6 +717,21 @@ static int null_add_dev(void)
blk_queue_logical_block_size(nullb->q, bs);
blk_queue_physical_block_size(nullb->q, bs);
+ sprintf(nullb->disk_name, "nullb%d", nullb->index);
+
+ if (use_lightnvm) {
+ rv = nvm_register(nullb->q, nullb->disk_name,
+ &null_lnvm_dev_ops);
+ if (rv)
+ goto out_cleanup_blk_queue;
+ goto done;
+ }
+
+ disk = nullb->disk = alloc_disk_node(1, home_node);
+ if (!disk) {
+ rv = -ENOMEM;
+ goto out_cleanup_lightnvm;
+ }
size = gb * 1024 * 1024 * 1024ULL;
set_capacity(disk, size >> 9);
@@ -598,10 +741,15 @@ static int null_add_dev(void)
disk->fops = &null_fops;
disk->private_data = nullb;
disk->queue = nullb->q;
- sprintf(disk->disk_name, "nullb%d", nullb->index);
+ strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
+
add_disk(disk);
+done:
return 0;
+out_cleanup_lightnvm:
+ if (use_lightnvm)
+ nvm_unregister(nullb->disk_name);
out_cleanup_blk_queue:
blk_cleanup_queue(nullb->q);
out_cleanup_tags:
@@ -617,7 +765,9 @@ out:
static int __init null_init(void)
{
+ int ret = 0;
unsigned int i;
+ struct nullb *nullb;
if (bs > PAGE_SIZE) {
pr_warn("null_blk: invalid block size\n");
@@ -625,6 +775,18 @@ static int __init null_init(void)
bs = PAGE_SIZE;
}
+ if (use_lightnvm && bs != 4096) {
+ pr_warn("null_blk: LightNVM only supports 4k block size\n");
+ pr_warn("null_blk: defaults block size to 4k\n");
+ bs = 4096;
+ }
+
+ if (use_lightnvm && queue_mode != NULL_Q_MQ) {
+ pr_warn("null_blk: LightNVM only supported for blk-mq\n");
+ pr_warn("null_blk: defaults queue mode to blk-mq\n");
+ queue_mode = NULL_Q_MQ;
+ }
+
if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
if (submit_queues < nr_online_nodes) {
pr_warn("null_blk: submit_queues param is set to %u.",
@@ -638,32 +800,38 @@ static int __init null_init(void)
mutex_init(&lock);
- /* Initialize a separate list for each CPU for issuing softirqs */
- for_each_possible_cpu(i) {
- struct completion_queue *cq = &per_cpu(completion_queues, i);
-
- init_llist_head(&cq->list);
-
- if (irqmode != NULL_IRQ_TIMER)
- continue;
-
- hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- cq->timer.function = null_cmd_timer_expired;
- }
-
null_major = register_blkdev(0, "nullb");
if (null_major < 0)
return null_major;
- for (i = 0; i < nr_devices; i++) {
- if (null_add_dev()) {
- unregister_blkdev(null_major, "nullb");
- return -EINVAL;
+ if (use_lightnvm) {
+ ppa_cache = kmem_cache_create("ppa_cache", 64 * sizeof(u64),
+ 0, 0, NULL);
+ if (!ppa_cache) {
+ pr_err("null_blk: unable to create ppa cache\n");
+ ret = -ENOMEM;
+ goto err_ppa;
}
}
+ for (i = 0; i < nr_devices; i++) {
+ ret = null_add_dev();
+ if (ret)
+ goto err_dev;
+ }
+
pr_info("null: module loaded\n");
return 0;
+
+err_dev:
+ while (!list_empty(&nullb_list)) {
+ nullb = list_entry(nullb_list.next, struct nullb, list);
+ null_del_dev(nullb);
+ }
+ kmem_cache_destroy(ppa_cache);
+err_ppa:
+ unregister_blkdev(null_major, "nullb");
+ return ret;
}
static void __exit null_exit(void)
@@ -678,6 +846,8 @@ static void __exit null_exit(void)
null_del_dev(nullb);
}
mutex_unlock(&lock);
+
+ kmem_cache_destroy(ppa_cache);
}
module_init(null_init);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 235708c7c46e..81ea69fee7ca 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3442,6 +3442,7 @@ static void rbd_queue_workfn(struct work_struct *work)
goto err_rq;
}
img_request->rq = rq;
+ snapc = NULL; /* img_request consumes a ref */
if (op_type == OBJ_OP_DISCARD)
result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA,
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index f9099940c272..41fb1a917b17 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -950,6 +950,8 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
goto unmap;
for (n = 0, i = 0; n < nseg; n++) {
+ uint8_t first_sect, last_sect;
+
if ((n % SEGS_PER_INDIRECT_FRAME) == 0) {
/* Map indirect segments */
if (segments)
@@ -957,15 +959,18 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]->page);
}
i = n % SEGS_PER_INDIRECT_FRAME;
+
pending_req->segments[n]->gref = segments[i].gref;
- seg[n].nsec = segments[i].last_sect -
- segments[i].first_sect + 1;
- seg[n].offset = (segments[i].first_sect << 9);
- if ((segments[i].last_sect >= (XEN_PAGE_SIZE >> 9)) ||
- (segments[i].last_sect < segments[i].first_sect)) {
+
+ first_sect = READ_ONCE(segments[i].first_sect);
+ last_sect = READ_ONCE(segments[i].last_sect);
+ if (last_sect >= (XEN_PAGE_SIZE >> 9) || last_sect < first_sect) {
rc = -EINVAL;
goto unmap;
}
+
+ seg[n].nsec = last_sect - first_sect + 1;
+ seg[n].offset = first_sect << 9;
preq->nr_sects += seg[n].nsec;
}
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 68e87a037b99..c929ae22764c 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -408,8 +408,8 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
struct blkif_x86_32_request *src)
{
int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
- dst->operation = src->operation;
- switch (src->operation) {
+ dst->operation = READ_ONCE(src->operation);
+ switch (dst->operation) {
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
case BLKIF_OP_WRITE_BARRIER:
@@ -456,8 +456,8 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
struct blkif_x86_64_request *src)
{
int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
- dst->operation = src->operation;
- switch (src->operation) {
+ dst->operation = READ_ONCE(src->operation);
+ switch (dst->operation) {
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
case BLKIF_OP_WRITE_BARRIER:
diff --git a/drivers/bus/omap-ocp2scp.c b/drivers/bus/omap-ocp2scp.c
index 9f1856948758..bf500e0e7362 100644
--- a/drivers/bus/omap-ocp2scp.c
+++ b/drivers/bus/omap-ocp2scp.c
@@ -117,7 +117,7 @@ static struct platform_driver omap_ocp2scp_driver = {
module_platform_driver(omap_ocp2scp_driver);
-MODULE_ALIAS("platform: omap-ocp2scp");
+MODULE_ALIAS("platform:omap-ocp2scp");
MODULE_AUTHOR("Texas Instruments Inc.");
MODULE_DESCRIPTION("OMAP OCP2SCP driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c
index 846bc29c157d..25996e256110 100644
--- a/drivers/bus/sunxi-rsb.c
+++ b/drivers/bus/sunxi-rsb.c
@@ -342,13 +342,13 @@ static int sunxi_rsb_read(struct sunxi_rsb *rsb, u8 rtaddr, u8 addr,
ret = _sunxi_rsb_run_xfer(rsb);
if (ret)
- goto out;
+ goto unlock;
*buf = readl(rsb->regs + RSB_DATA);
+unlock:
mutex_unlock(&rsb->lock);
-out:
return ret;
}
@@ -527,9 +527,9 @@ static int sunxi_rsb_init_device_mode(struct sunxi_rsb *rsb)
*/
static const struct sunxi_rsb_addr_map sunxi_rsb_addr_maps[] = {
- { 0x3e3, 0x2d }, /* Primary PMIC: AXP223, AXP809, AXP81X, ... */
+ { 0x3a3, 0x2d }, /* Primary PMIC: AXP223, AXP809, AXP81X, ... */
{ 0x745, 0x3a }, /* Secondary PMIC: AXP806, ... */
- { 0xe89, 0x45 }, /* Peripheral IC: AC100, ... */
+ { 0xe89, 0x4e }, /* Peripheral IC: AC100, ... */
};
static u8 sunxi_rsb_get_rtaddr(u16 hwaddr)
diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c
index 0c98a9d51a24..44ce80606944 100644
--- a/drivers/char/hw_random/via-rng.c
+++ b/drivers/char/hw_random/via-rng.c
@@ -140,7 +140,7 @@ static int via_rng_init(struct hwrng *rng)
* RNG configuration like it used to be the case in this
* register */
if ((c->x86 == 6) && (c->x86_model >= 0x0f)) {
- if (!cpu_has_xstore_enabled) {
+ if (!boot_cpu_has(X86_FEATURE_XSTORE_EN)) {
pr_err(PFX "can't enable hardware RNG "
"if XSTORE is not enabled\n");
return -ENODEV;
@@ -200,8 +200,9 @@ static int __init mod_init(void)
{
int err;
- if (!cpu_has_xstore)
+ if (!boot_cpu_has(X86_FEATURE_XSTORE))
return -ENODEV;
+
pr_info("VIA RNG detected\n");
err = hwrng_register(&via_rng);
if (err) {
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 654f6f36a071..4cc72fa017c7 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -412,18 +412,42 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info)
return rv;
}
-static void start_check_enables(struct smi_info *smi_info)
+static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
+{
+ smi_info->last_timeout_jiffies = jiffies;
+ mod_timer(&smi_info->si_timer, new_val);
+ smi_info->timer_running = true;
+}
+
+/*
+ * Start a new message and (re)start the timer and thread.
+ */
+static void start_new_msg(struct smi_info *smi_info, unsigned char *msg,
+ unsigned int size)
+{
+ smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_JIFFIES);
+
+ if (smi_info->thread)
+ wake_up_process(smi_info->thread);
+
+ smi_info->handlers->start_transaction(smi_info->si_sm, msg, size);
+}
+
+static void start_check_enables(struct smi_info *smi_info, bool start_timer)
{
unsigned char msg[2];
msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
- smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
+ if (start_timer)
+ start_new_msg(smi_info, msg, 2);
+ else
+ smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
smi_info->si_state = SI_CHECKING_ENABLES;
}
-static void start_clear_flags(struct smi_info *smi_info)
+static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
{
unsigned char msg[3];
@@ -432,7 +456,10 @@ static void start_clear_flags(struct smi_info *smi_info)
msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD;
msg[2] = WDT_PRE_TIMEOUT_INT;
- smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
+ if (start_timer)
+ start_new_msg(smi_info, msg, 3);
+ else
+ smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
smi_info->si_state = SI_CLEARING_FLAGS;
}
@@ -442,10 +469,8 @@ static void start_getting_msg_queue(struct smi_info *smi_info)
smi_info->curr_msg->data[1] = IPMI_GET_MSG_CMD;
smi_info->curr_msg->data_size = 2;
- smi_info->handlers->start_transaction(
- smi_info->si_sm,
- smi_info->curr_msg->data,
- smi_info->curr_msg->data_size);
+ start_new_msg(smi_info, smi_info->curr_msg->data,
+ smi_info->curr_msg->data_size);
smi_info->si_state = SI_GETTING_MESSAGES;
}
@@ -455,20 +480,11 @@ static void start_getting_events(struct smi_info *smi_info)
smi_info->curr_msg->data[1] = IPMI_READ_EVENT_MSG_BUFFER_CMD;
smi_info->curr_msg->data_size = 2;
- smi_info->handlers->start_transaction(
- smi_info->si_sm,
- smi_info->curr_msg->data,
- smi_info->curr_msg->data_size);
+ start_new_msg(smi_info, smi_info->curr_msg->data,
+ smi_info->curr_msg->data_size);
smi_info->si_state = SI_GETTING_EVENTS;
}
-static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
-{
- smi_info->last_timeout_jiffies = jiffies;
- mod_timer(&smi_info->si_timer, new_val);
- smi_info->timer_running = true;
-}
-
/*
* When we have a situtaion where we run out of memory and cannot
* allocate messages, we just leave them in the BMC and run the system
@@ -478,11 +494,11 @@ static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
* Note that we cannot just use disable_irq(), since the interrupt may
* be shared.
*/
-static inline bool disable_si_irq(struct smi_info *smi_info)
+static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer)
{
if ((smi_info->irq) && (!smi_info->interrupt_disabled)) {
smi_info->interrupt_disabled = true;
- start_check_enables(smi_info);
+ start_check_enables(smi_info, start_timer);
return true;
}
return false;
@@ -492,7 +508,7 @@ static inline bool enable_si_irq(struct smi_info *smi_info)
{
if ((smi_info->irq) && (smi_info->interrupt_disabled)) {
smi_info->interrupt_disabled = false;
- start_check_enables(smi_info);
+ start_check_enables(smi_info, true);
return true;
}
return false;
@@ -510,7 +526,7 @@ static struct ipmi_smi_msg *alloc_msg_handle_irq(struct smi_info *smi_info)
msg = ipmi_alloc_smi_msg();
if (!msg) {
- if (!disable_si_irq(smi_info))
+ if (!disable_si_irq(smi_info, true))
smi_info->si_state = SI_NORMAL;
} else if (enable_si_irq(smi_info)) {
ipmi_free_smi_msg(msg);
@@ -526,7 +542,7 @@ static void handle_flags(struct smi_info *smi_info)
/* Watchdog pre-timeout */
smi_inc_stat(smi_info, watchdog_pretimeouts);
- start_clear_flags(smi_info);
+ start_clear_flags(smi_info, true);
smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT;
if (smi_info->intf)
ipmi_smi_watchdog_pretimeout(smi_info->intf);
@@ -879,8 +895,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info,
msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
msg[1] = IPMI_GET_MSG_FLAGS_CMD;
- smi_info->handlers->start_transaction(
- smi_info->si_sm, msg, 2);
+ start_new_msg(smi_info, msg, 2);
smi_info->si_state = SI_GETTING_FLAGS;
goto restart;
}
@@ -910,7 +925,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info,
* disable and messages disabled.
*/
if (smi_info->supports_event_msg_buff || smi_info->irq) {
- start_check_enables(smi_info);
+ start_check_enables(smi_info, true);
} else {
smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
if (!smi_info->curr_msg)
@@ -920,6 +935,13 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info,
}
goto restart;
}
+
+ if (si_sm_result == SI_SM_IDLE && smi_info->timer_running) {
+ /* Ok it if fails, the timer will just go off. */
+ if (del_timer(&smi_info->si_timer))
+ smi_info->timer_running = false;
+ }
+
out:
return si_sm_result;
}
@@ -1208,14 +1230,14 @@ static int smi_start_processing(void *send_info,
new_smi->intf = intf;
- /* Try to claim any interrupts. */
- if (new_smi->irq_setup)
- new_smi->irq_setup(new_smi);
-
/* Set up the timer that drives the interface. */
setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi);
smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES);
+ /* Try to claim any interrupts. */
+ if (new_smi->irq_setup)
+ new_smi->irq_setup(new_smi);
+
/*
* Check if the user forcefully enabled the daemon.
*/
@@ -2560,6 +2582,7 @@ static const struct of_device_id of_ipmi_match[] = {
.data = (void *)(unsigned long) SI_BT },
{},
};
+MODULE_DEVICE_TABLE(of, of_ipmi_match);
static int of_ipmi_probe(struct platform_device *dev)
{
@@ -2646,7 +2669,6 @@ static int of_ipmi_probe(struct platform_device *dev)
}
return 0;
}
-MODULE_DEVICE_TABLE(of, of_ipmi_match);
#else
#define of_ipmi_match NULL
static int of_ipmi_probe(struct platform_device *dev)
@@ -3613,7 +3635,7 @@ static int try_smi_init(struct smi_info *new_smi)
* Start clearing the flags before we enable interrupts or the
* timer to avoid racing with the timer.
*/
- start_clear_flags(new_smi);
+ start_clear_flags(new_smi, false);
/*
* IRQ is defined to be set when non-zero. req_events will
@@ -3908,7 +3930,7 @@ static void cleanup_one_si(struct smi_info *to_clean)
poll(to_clean);
schedule_timeout_uninterruptible(1);
}
- disable_si_irq(to_clean);
+ disable_si_irq(to_clean, false);
while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
poll(to_clean);
schedule_timeout_uninterruptible(1);
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 0ac3bd1a5497..096f0cef4da1 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -153,6 +153,9 @@ static int timeout = 10;
/* The pre-timeout is disabled by default. */
static int pretimeout;
+/* Default timeout to set on panic */
+static int panic_wdt_timeout = 255;
+
/* Default action is to reset the board on a timeout. */
static unsigned char action_val = WDOG_TIMEOUT_RESET;
@@ -293,6 +296,9 @@ MODULE_PARM_DESC(timeout, "Timeout value in seconds.");
module_param(pretimeout, timeout, 0644);
MODULE_PARM_DESC(pretimeout, "Pretimeout value in seconds.");
+module_param(panic_wdt_timeout, timeout, 0644);
+MODULE_PARM_DESC(timeout, "Timeout value on kernel panic in seconds.");
+
module_param_cb(action, &param_ops_str, action_op, 0644);
MODULE_PARM_DESC(action, "Timeout action. One of: "
"reset, none, power_cycle, power_off.");
@@ -1189,7 +1195,7 @@ static int wdog_panic_handler(struct notifier_block *this,
/* Make sure we do this only once. */
panic_event_handled = 1;
- timeout = 255;
+ timeout = panic_wdt_timeout;
pretimeout = 0;
panic_halt_ipmi_set_timeout();
}
diff --git a/drivers/clk/clk-gpio.c b/drivers/clk/clk-gpio.c
index 10819e248414..335322dc403f 100644
--- a/drivers/clk/clk-gpio.c
+++ b/drivers/clk/clk-gpio.c
@@ -209,6 +209,8 @@ EXPORT_SYMBOL_GPL(clk_register_gpio_mux);
struct clk_gpio_delayed_register_data {
const char *gpio_name;
+ int num_parents;
+ const char **parent_names;
struct device_node *node;
struct mutex lock;
struct clk *clk;
@@ -222,8 +224,6 @@ static struct clk *of_clk_gpio_delayed_register_get(
{
struct clk_gpio_delayed_register_data *data = _data;
struct clk *clk;
- const char **parent_names;
- int i, num_parents;
int gpio;
enum of_gpio_flags of_flags;
@@ -248,26 +248,14 @@ static struct clk *of_clk_gpio_delayed_register_get(
return ERR_PTR(gpio);
}
- num_parents = of_clk_get_parent_count(data->node);
-
- parent_names = kcalloc(num_parents, sizeof(char *), GFP_KERNEL);
- if (!parent_names) {
- clk = ERR_PTR(-ENOMEM);
- goto out;
- }
-
- for (i = 0; i < num_parents; i++)
- parent_names[i] = of_clk_get_parent_name(data->node, i);
-
- clk = data->clk_register_get(data->node->name, parent_names,
- num_parents, gpio, of_flags & OF_GPIO_ACTIVE_LOW);
+ clk = data->clk_register_get(data->node->name, data->parent_names,
+ data->num_parents, gpio, of_flags & OF_GPIO_ACTIVE_LOW);
if (IS_ERR(clk))
goto out;
data->clk = clk;
out:
mutex_unlock(&data->lock);
- kfree(parent_names);
return clk;
}
@@ -296,11 +284,24 @@ static void __init of_gpio_clk_setup(struct device_node *node,
unsigned gpio, bool active_low))
{
struct clk_gpio_delayed_register_data *data;
+ const char **parent_names;
+ int i, num_parents;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return;
+ num_parents = of_clk_get_parent_count(node);
+
+ parent_names = kcalloc(num_parents, sizeof(char *), GFP_KERNEL);
+ if (!parent_names)
+ return;
+
+ for (i = 0; i < num_parents; i++)
+ parent_names[i] = of_clk_get_parent_name(node, i);
+
+ data->num_parents = num_parents;
+ data->parent_names = parent_names;
data->node = node;
data->gpio_name = gpio_name;
data->clk_register_get = clk_register_get;
diff --git a/drivers/clk/clk-qoriq.c b/drivers/clk/clk-qoriq.c
index 1ab0fb81c6a0..7bc1c4527ae4 100644
--- a/drivers/clk/clk-qoriq.c
+++ b/drivers/clk/clk-qoriq.c
@@ -778,8 +778,10 @@ static struct clk * __init create_one_cmux(struct clockgen *cg, int idx)
*/
clksel = (cg_in(cg, hwc->reg) & CLKSEL_MASK) >> CLKSEL_SHIFT;
div = get_pll_div(cg, hwc, clksel);
- if (!div)
+ if (!div) {
+ kfree(hwc);
return NULL;
+ }
pct80_rate = clk_get_rate(div->clk);
pct80_rate *= 8;
diff --git a/drivers/clk/clk-scpi.c b/drivers/clk/clk-scpi.c
index 0b501a9fef92..cd0f2726f5e0 100644
--- a/drivers/clk/clk-scpi.c
+++ b/drivers/clk/clk-scpi.c
@@ -292,6 +292,7 @@ static int scpi_clocks_probe(struct platform_device *pdev)
ret = scpi_clk_add(dev, child, match);
if (ret) {
scpi_clocks_remove(pdev);
+ of_node_put(child);
return ret;
}
}
diff --git a/drivers/clk/imx/clk-pllv1.c b/drivers/clk/imx/clk-pllv1.c
index 8564e4342c7d..82fe3662b5f6 100644
--- a/drivers/clk/imx/clk-pllv1.c
+++ b/drivers/clk/imx/clk-pllv1.c
@@ -52,7 +52,7 @@ static unsigned long clk_pllv1_recalc_rate(struct clk_hw *hw,
unsigned long parent_rate)
{
struct clk_pllv1 *pll = to_clk_pllv1(hw);
- long long ll;
+ unsigned long long ull;
int mfn_abs;
unsigned int mfi, mfn, mfd, pd;
u32 reg;
@@ -94,16 +94,16 @@ static unsigned long clk_pllv1_recalc_rate(struct clk_hw *hw,
rate = parent_rate * 2;
rate /= pd + 1;
- ll = (unsigned long long)rate * mfn_abs;
+ ull = (unsigned long long)rate * mfn_abs;
- do_div(ll, mfd + 1);
+ do_div(ull, mfd + 1);
if (mfn_is_negative(pll, mfn))
- ll = -ll;
+ ull = (rate * mfi) - ull;
+ else
+ ull = (rate * mfi) + ull;
- ll = (rate * mfi) + ll;
-
- return ll;
+ return ull;
}
static struct clk_ops clk_pllv1_ops = {
diff --git a/drivers/clk/imx/clk-pllv2.c b/drivers/clk/imx/clk-pllv2.c
index b18f875eac6a..4aeda56ce372 100644
--- a/drivers/clk/imx/clk-pllv2.c
+++ b/drivers/clk/imx/clk-pllv2.c
@@ -79,7 +79,7 @@ static unsigned long __clk_pllv2_recalc_rate(unsigned long parent_rate,
{
long mfi, mfn, mfd, pdf, ref_clk;
unsigned long dbl;
- s64 temp;
+ u64 temp;
dbl = dp_ctl & MXC_PLL_DP_CTL_DPDCK0_2_EN;
@@ -98,8 +98,9 @@ static unsigned long __clk_pllv2_recalc_rate(unsigned long parent_rate,
temp = (u64) ref_clk * abs(mfn);
do_div(temp, mfd + 1);
if (mfn < 0)
- temp = -temp;
- temp = (ref_clk * mfi) + temp;
+ temp = (ref_clk * mfi) - temp;
+ else
+ temp = (ref_clk * mfi) + temp;
return temp;
}
@@ -126,7 +127,7 @@ static int __clk_pllv2_set_rate(unsigned long rate, unsigned long parent_rate,
{
u32 reg;
long mfi, pdf, mfn, mfd = 999999;
- s64 temp64;
+ u64 temp64;
unsigned long quad_parent_rate;
quad_parent_rate = 4 * parent_rate;
diff --git a/drivers/clk/imx/clk-vf610.c b/drivers/clk/imx/clk-vf610.c
index d1b1c95177bb..0a94d9661d91 100644
--- a/drivers/clk/imx/clk-vf610.c
+++ b/drivers/clk/imx/clk-vf610.c
@@ -335,22 +335,22 @@ static void __init vf610_clocks_init(struct device_node *ccm_node)
clk[VF610_CLK_SAI0_SEL] = imx_clk_mux("sai0_sel", CCM_CSCMR1, 0, 2, sai_sels, 4);
clk[VF610_CLK_SAI0_EN] = imx_clk_gate("sai0_en", "sai0_sel", CCM_CSCDR1, 16);
clk[VF610_CLK_SAI0_DIV] = imx_clk_divider("sai0_div", "sai0_en", CCM_CSCDR1, 0, 4);
- clk[VF610_CLK_SAI0] = imx_clk_gate2("sai0", "sai0_div", CCM_CCGR0, CCM_CCGRx_CGn(15));
+ clk[VF610_CLK_SAI0] = imx_clk_gate2("sai0", "ipg_bus", CCM_CCGR0, CCM_CCGRx_CGn(15));
clk[VF610_CLK_SAI1_SEL] = imx_clk_mux("sai1_sel", CCM_CSCMR1, 2, 2, sai_sels, 4);
clk[VF610_CLK_SAI1_EN] = imx_clk_gate("sai1_en", "sai1_sel", CCM_CSCDR1, 17);
clk[VF610_CLK_SAI1_DIV] = imx_clk_divider("sai1_div", "sai1_en", CCM_CSCDR1, 4, 4);
- clk[VF610_CLK_SAI1] = imx_clk_gate2("sai1", "sai1_div", CCM_CCGR1, CCM_CCGRx_CGn(0));
+ clk[VF610_CLK_SAI1] = imx_clk_gate2("sai1", "ipg_bus", CCM_CCGR1, CCM_CCGRx_CGn(0));
clk[VF610_CLK_SAI2_SEL] = imx_clk_mux("sai2_sel", CCM_CSCMR1, 4, 2, sai_sels, 4);
clk[VF610_CLK_SAI2_EN] = imx_clk_gate("sai2_en", "sai2_sel", CCM_CSCDR1, 18);
clk[VF610_CLK_SAI2_DIV] = imx_clk_divider("sai2_div", "sai2_en", CCM_CSCDR1, 8, 4);
- clk[VF610_CLK_SAI2] = imx_clk_gate2("sai2", "sai2_div", CCM_CCGR1, CCM_CCGRx_CGn(1));
+ clk[VF610_CLK_SAI2] = imx_clk_gate2("sai2", "ipg_bus", CCM_CCGR1, CCM_CCGRx_CGn(1));
clk[VF610_CLK_SAI3_SEL] = imx_clk_mux("sai3_sel", CCM_CSCMR1, 6, 2, sai_sels, 4);
clk[VF610_CLK_SAI3_EN] = imx_clk_gate("sai3_en", "sai3_sel", CCM_CSCDR1, 19);
clk[VF610_CLK_SAI3_DIV] = imx_clk_divider("sai3_div", "sai3_en", CCM_CSCDR1, 12, 4);
- clk[VF610_CLK_SAI3] = imx_clk_gate2("sai3", "sai3_div", CCM_CCGR1, CCM_CCGRx_CGn(2));
+ clk[VF610_CLK_SAI3] = imx_clk_gate2("sai3", "ipg_bus", CCM_CCGR1, CCM_CCGRx_CGn(2));
clk[VF610_CLK_NFC_SEL] = imx_clk_mux("nfc_sel", CCM_CSCMR1, 12, 2, nfc_sels, 4);
clk[VF610_CLK_NFC_EN] = imx_clk_gate("nfc_en", "nfc_sel", CCM_CSCDR2, 9);
diff --git a/drivers/clk/mmp/clk-mmp2.c b/drivers/clk/mmp/clk-mmp2.c
index 09d2832fbd78..71fd29348f28 100644
--- a/drivers/clk/mmp/clk-mmp2.c
+++ b/drivers/clk/mmp/clk-mmp2.c
@@ -9,6 +9,7 @@
* warranty of any kind, whether express or implied.
*/
+#include <linux/clk.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/spinlock.h>
diff --git a/drivers/clk/mmp/clk-pxa168.c b/drivers/clk/mmp/clk-pxa168.c
index 93e967c0f972..75244915df05 100644
--- a/drivers/clk/mmp/clk-pxa168.c
+++ b/drivers/clk/mmp/clk-pxa168.c
@@ -9,6 +9,7 @@
* warranty of any kind, whether express or implied.
*/
+#include <linux/clk.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/spinlock.h>
diff --git a/drivers/clk/mmp/clk-pxa910.c b/drivers/clk/mmp/clk-pxa910.c
index 993abcdb32cc..37ba04ba1368 100644
--- a/drivers/clk/mmp/clk-pxa910.c
+++ b/drivers/clk/mmp/clk-pxa910.c
@@ -9,6 +9,7 @@
* warranty of any kind, whether express or implied.
*/
+#include <linux/clk.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/spinlock.h>
diff --git a/drivers/clk/sunxi/clk-a10-pll2.c b/drivers/clk/sunxi/clk-a10-pll2.c
index 5484c31ec568..0ee1f363e4be 100644
--- a/drivers/clk/sunxi/clk-a10-pll2.c
+++ b/drivers/clk/sunxi/clk-a10-pll2.c
@@ -41,15 +41,10 @@
#define SUN4I_PLL2_OUTPUTS 4
-struct sun4i_pll2_data {
- u32 post_div_offset;
- u32 pre_div_flags;
-};
-
static DEFINE_SPINLOCK(sun4i_a10_pll2_lock);
static void __init sun4i_pll2_setup(struct device_node *node,
- struct sun4i_pll2_data *data)
+ int post_div_offset)
{
const char *clk_name = node->name, *parent;
struct clk **clks, *base_clk, *prediv_clk;
@@ -76,7 +71,7 @@ static void __init sun4i_pll2_setup(struct device_node *node,
parent, 0, reg,
SUN4I_PLL2_PRE_DIV_SHIFT,
SUN4I_PLL2_PRE_DIV_WIDTH,
- data->pre_div_flags,
+ CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO,
&sun4i_a10_pll2_lock);
if (!prediv_clk) {
pr_err("Couldn't register the prediv clock\n");
@@ -127,7 +122,7 @@ static void __init sun4i_pll2_setup(struct device_node *node,
*/
val = readl(reg);
val &= ~(SUN4I_PLL2_POST_DIV_MASK << SUN4I_PLL2_POST_DIV_SHIFT);
- val |= (SUN4I_PLL2_POST_DIV_VALUE - data->post_div_offset) << SUN4I_PLL2_POST_DIV_SHIFT;
+ val |= (SUN4I_PLL2_POST_DIV_VALUE - post_div_offset) << SUN4I_PLL2_POST_DIV_SHIFT;
writel(val, reg);
of_property_read_string_index(node, "clock-output-names",
@@ -191,25 +186,17 @@ err_unmap:
iounmap(reg);
}
-static struct sun4i_pll2_data sun4i_a10_pll2_data = {
- .pre_div_flags = CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO,
-};
-
static void __init sun4i_a10_pll2_setup(struct device_node *node)
{
- sun4i_pll2_setup(node, &sun4i_a10_pll2_data);
+ sun4i_pll2_setup(node, 0);
}
CLK_OF_DECLARE(sun4i_a10_pll2, "allwinner,sun4i-a10-pll2-clk",
sun4i_a10_pll2_setup);
-static struct sun4i_pll2_data sun5i_a13_pll2_data = {
- .post_div_offset = 1,
-};
-
static void __init sun5i_a13_pll2_setup(struct device_node *node)
{
- sun4i_pll2_setup(node, &sun5i_a13_pll2_data);
+ sun4i_pll2_setup(node, 1);
}
CLK_OF_DECLARE(sun5i_a13_pll2, "allwinner,sun5i-a13-pll2-clk",
diff --git a/drivers/clk/ti/clk-816x.c b/drivers/clk/ti/clk-816x.c
index 1dfad0c712cd..2a5d84fdddc5 100644
--- a/drivers/clk/ti/clk-816x.c
+++ b/drivers/clk/ti/clk-816x.c
@@ -20,6 +20,8 @@ static struct ti_dt_clk dm816x_clks[] = {
DT_CLK(NULL, "sys_clkin", "sys_clkin_ck"),
DT_CLK(NULL, "timer_sys_ck", "sys_clkin_ck"),
DT_CLK(NULL, "sys_32k_ck", "sys_32k_ck"),
+ DT_CLK(NULL, "timer_32k_ck", "sysclk18_ck"),
+ DT_CLK(NULL, "timer_ext_ck", "tclkin_ck"),
DT_CLK(NULL, "mpu_ck", "mpu_ck"),
DT_CLK(NULL, "timer1_fck", "timer1_fck"),
DT_CLK(NULL, "timer2_fck", "timer2_fck"),
diff --git a/drivers/clk/ti/clkt_dpll.c b/drivers/clk/ti/clkt_dpll.c
index 9023ca9caf84..b5cc6f66ae5d 100644
--- a/drivers/clk/ti/clkt_dpll.c
+++ b/drivers/clk/ti/clkt_dpll.c
@@ -240,7 +240,7 @@ u8 omap2_init_dpll_parent(struct clk_hw *hw)
*/
unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk)
{
- long long dpll_clk;
+ u64 dpll_clk;
u32 dpll_mult, dpll_div, v;
struct dpll_data *dd;
@@ -262,7 +262,7 @@ unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk)
dpll_div = v & dd->div1_mask;
dpll_div >>= __ffs(dd->div1_mask);
- dpll_clk = (long long)clk_get_rate(dd->clk_ref) * dpll_mult;
+ dpll_clk = (u64)clk_get_rate(dd->clk_ref) * dpll_mult;
do_div(dpll_clk, dpll_div + 1);
return dpll_clk;
diff --git a/drivers/clk/ti/divider.c b/drivers/clk/ti/divider.c
index 5b1726829e6d..df2558350fc1 100644
--- a/drivers/clk/ti/divider.c
+++ b/drivers/clk/ti/divider.c
@@ -214,7 +214,6 @@ static int ti_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
{
struct clk_divider *divider;
unsigned int div, value;
- unsigned long flags = 0;
u32 val;
if (!hw || !rate)
@@ -228,9 +227,6 @@ static int ti_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
if (value > div_mask(divider))
value = div_mask(divider);
- if (divider->lock)
- spin_lock_irqsave(divider->lock, flags);
-
if (divider->flags & CLK_DIVIDER_HIWORD_MASK) {
val = div_mask(divider) << (divider->shift + 16);
} else {
@@ -240,9 +236,6 @@ static int ti_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
val |= value << divider->shift;
ti_clk_ll_ops->clk_writel(val, divider->reg);
- if (divider->lock)
- spin_unlock_irqrestore(divider->lock, flags);
-
return 0;
}
@@ -256,8 +249,7 @@ static struct clk *_register_divider(struct device *dev, const char *name,
const char *parent_name,
unsigned long flags, void __iomem *reg,
u8 shift, u8 width, u8 clk_divider_flags,
- const struct clk_div_table *table,
- spinlock_t *lock)
+ const struct clk_div_table *table)
{
struct clk_divider *div;
struct clk *clk;
@@ -288,7 +280,6 @@ static struct clk *_register_divider(struct device *dev, const char *name,
div->shift = shift;
div->width = width;
div->flags = clk_divider_flags;
- div->lock = lock;
div->hw.init = &init;
div->table = table;
@@ -421,7 +412,7 @@ struct clk *ti_clk_register_divider(struct ti_clk *setup)
clk = _register_divider(NULL, setup->name, div->parent,
flags, (void __iomem *)reg, div->bit_shift,
- width, div_flags, table, NULL);
+ width, div_flags, table);
if (IS_ERR(clk))
kfree(table);
@@ -584,8 +575,7 @@ static void __init of_ti_divider_clk_setup(struct device_node *node)
goto cleanup;
clk = _register_divider(NULL, node->name, parent_name, flags, reg,
- shift, width, clk_divider_flags, table,
- NULL);
+ shift, width, clk_divider_flags, table);
if (!IS_ERR(clk)) {
of_clk_add_provider(node, of_clk_src_simple_get, clk);
diff --git a/drivers/clk/ti/fapll.c b/drivers/clk/ti/fapll.c
index f4b2e9888bdf..66a0d0ed8b55 100644
--- a/drivers/clk/ti/fapll.c
+++ b/drivers/clk/ti/fapll.c
@@ -168,7 +168,7 @@ static unsigned long ti_fapll_recalc_rate(struct clk_hw *hw,
{
struct fapll_data *fd = to_fapll(hw);
u32 fapll_n, fapll_p, v;
- long long rate;
+ u64 rate;
if (ti_fapll_clock_is_bypass(fd))
return parent_rate;
@@ -314,7 +314,7 @@ static unsigned long ti_fapll_synth_recalc_rate(struct clk_hw *hw,
{
struct fapll_synth *synth = to_synth(hw);
u32 synth_div_m;
- long long rate;
+ u64 rate;
/* The audio_pll_clk1 is hardwired to produce 32.768KiHz clock */
if (!synth->div)
diff --git a/drivers/clk/ti/mux.c b/drivers/clk/ti/mux.c
index 69f08a1d047d..dab9ba88b9d6 100644
--- a/drivers/clk/ti/mux.c
+++ b/drivers/clk/ti/mux.c
@@ -69,7 +69,6 @@ static int ti_clk_mux_set_parent(struct clk_hw *hw, u8 index)
{
struct clk_mux *mux = to_clk_mux(hw);
u32 val;
- unsigned long flags = 0;
if (mux->table) {
index = mux->table[index];
@@ -81,9 +80,6 @@ static int ti_clk_mux_set_parent(struct clk_hw *hw, u8 index)
index++;
}
- if (mux->lock)
- spin_lock_irqsave(mux->lock, flags);
-
if (mux->flags & CLK_MUX_HIWORD_MASK) {
val = mux->mask << (mux->shift + 16);
} else {
@@ -93,9 +89,6 @@ static int ti_clk_mux_set_parent(struct clk_hw *hw, u8 index)
val |= index << mux->shift;
ti_clk_ll_ops->clk_writel(val, mux->reg);
- if (mux->lock)
- spin_unlock_irqrestore(mux->lock, flags);
-
return 0;
}
@@ -109,7 +102,7 @@ static struct clk *_register_mux(struct device *dev, const char *name,
const char **parent_names, u8 num_parents,
unsigned long flags, void __iomem *reg,
u8 shift, u32 mask, u8 clk_mux_flags,
- u32 *table, spinlock_t *lock)
+ u32 *table)
{
struct clk_mux *mux;
struct clk *clk;
@@ -133,7 +126,6 @@ static struct clk *_register_mux(struct device *dev, const char *name,
mux->shift = shift;
mux->mask = mask;
mux->flags = clk_mux_flags;
- mux->lock = lock;
mux->table = table;
mux->hw.init = &init;
@@ -175,7 +167,7 @@ struct clk *ti_clk_register_mux(struct ti_clk *setup)
return _register_mux(NULL, setup->name, mux->parents, mux->num_parents,
flags, (void __iomem *)reg, mux->bit_shift, mask,
- mux_flags, NULL, NULL);
+ mux_flags, NULL);
}
/**
@@ -227,8 +219,7 @@ static void of_mux_clk_setup(struct device_node *node)
mask = (1 << fls(mask)) - 1;
clk = _register_mux(NULL, node->name, parent_names, num_parents,
- flags, reg, shift, mask, clk_mux_flags, NULL,
- NULL);
+ flags, reg, shift, mask, clk_mux_flags, NULL);
if (!IS_ERR(clk))
of_clk_add_provider(node, of_clk_src_simple_get, clk);
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 71cfdf7c9708..b251013eef0a 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -1,4 +1,5 @@
menu "Clock Source drivers"
+ depends on !ARCH_USES_GETTIMEOFFSET
config CLKSRC_OF
bool
@@ -27,10 +28,16 @@ config CLKSRC_MMIO
bool
config DIGICOLOR_TIMER
- bool
+ bool "Digicolor timer driver" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
+ help
+ Enables the support for the digicolor timer driver.
config DW_APB_TIMER
- bool
+ bool "DW APB timer driver" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
+ help
+ Enables the support for the dw_apb timer.
config DW_APB_TIMER_OF
bool
@@ -38,47 +45,77 @@ config DW_APB_TIMER_OF
select CLKSRC_OF
config ROCKCHIP_TIMER
- bool
+ bool "Rockchip timer driver" if COMPILE_TEST
+ depends on ARM || ARM64
select CLKSRC_OF
+ help
+ Enables the support for the rockchip timer driver.
config ARMADA_370_XP_TIMER
- bool
+ bool "Armada 370 and XP timer driver" if COMPILE_TEST
+ depends on ARM
select CLKSRC_OF
+ help
+ Enables the support for the Armada 370 and XP timer driver.
config MESON6_TIMER
- bool
+ bool "Meson6 timer driver" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
select CLKSRC_MMIO
+ help
+ Enables the support for the Meson6 timer driver.
config ORION_TIMER
+ bool "Orion timer driver" if COMPILE_TEST
+ depends on ARM
select CLKSRC_OF
select CLKSRC_MMIO
- bool
+ help
+ Enables the support for the Orion timer driver
config SUN4I_TIMER
+ bool "Sun4i timer driver" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
select CLKSRC_MMIO
- bool
+ help
+ Enables support for the Sun4i timer.
config SUN5I_HSTIMER
+ bool "Sun5i timer driver" if COMPILE_TEST
select CLKSRC_MMIO
- bool
+ depends on COMMON_CLK
+ help
+ Enables support the Sun5i timer.
config TEGRA_TIMER
- bool
+ bool "Tegra timer driver" if COMPILE_TEST
+ depends on ARM
+ help
+ Enables support for the Tegra driver.
config VT8500_TIMER
- bool
+ bool "VT8500 timer driver" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
+ help
+ Enables support for the VT8500 driver.
config CADENCE_TTC_TIMER
- bool
+ bool "Cadence TTC timer driver" if COMPILE_TEST
+ depends on COMMON_CLK
+ help
+ Enables support for the cadence ttc driver.
config ASM9260_TIMER
- bool
+ bool "ASM9260 timer driver" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
select CLKSRC_MMIO
select CLKSRC_OF
+ help
+ Enables support for the ASM9260 timer.
config CLKSRC_NOMADIK_MTU
- bool
- depends on (ARCH_NOMADIK || ARCH_U8500)
+ bool "Nomakdik clocksource driver" if COMPILE_TEST
+ depends on ARM
select CLKSRC_MMIO
help
Support for Multi Timer Unit. MTU provides access
@@ -92,9 +129,8 @@ config CLKSRC_NOMADIK_MTU_SCHED_CLOCK
Use the Multi Timer Unit as the sched_clock.
config CLKSRC_DBX500_PRCMU
- bool "Clocksource PRCMU Timer"
- depends on UX500_SOC_DB8500
- default y
+ bool "Clocksource PRCMU Timer" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
help
Use the always on PRCMU Timer as clocksource
@@ -115,13 +151,18 @@ config CLKSRC_EFM32
event device.
config CLKSRC_LPC32XX
- bool
+ bool "Clocksource for LPC32XX" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
select CLKSRC_MMIO
select CLKSRC_OF
+ help
+ Support for the LPC32XX clocksource.
config CLKSRC_PISTACHIO
- bool
+ bool "Clocksource for Pistachio SoC" if COMPILE_TEST
select CLKSRC_OF
+ help
+ Enables the clocksource for the Pistachio SoC.
config CLKSRC_TI_32K
bool "Texas Instruments 32.768 Hz Clocksource" if COMPILE_TEST
@@ -198,13 +239,14 @@ config CLKSRC_METAG_GENERIC
This option enables support for the Meta per-thread timers.
config CLKSRC_EXYNOS_MCT
- def_bool y if ARCH_EXYNOS
- depends on !ARM64
+ bool "Exynos multi core timer driver" if COMPILE_TEST
+ depends on ARM
help
Support for Multi Core Timer controller on Exynos SoCs.
config CLKSRC_SAMSUNG_PWM
- bool
+ bool "PWM timer drvier for Samsung S3C, S5P" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
help
This is a new clocksource driver for the PWM timer found in
Samsung S3C, S5P and Exynos SoCs, replacing an earlier driver
@@ -212,7 +254,8 @@ config CLKSRC_SAMSUNG_PWM
needed only on systems that do not have the Exynos MCT available.
config FSL_FTM_TIMER
- bool
+ bool "Freescale FlexTimer Module driver" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
help
Support for Freescale FlexTimer Module (FTM) timer.
@@ -225,9 +268,12 @@ config SYS_SUPPORTS_SH_CMT
bool
config MTK_TIMER
+ bool "Mediatek timer driver" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
select CLKSRC_OF
select CLKSRC_MMIO
- bool
+ help
+ Support for Mediatek timer driver.
config SYS_SUPPORTS_SH_MTU2
bool
@@ -278,7 +324,12 @@ config EM_TIMER_STI
such as EMEV2 from former NEC Electronics.
config CLKSRC_QCOM
- bool
+ bool "Qualcomm MSM timer" if COMPILE_TEST
+ depends on ARM
+ select CLKSRC_OF
+ help
+ This enables the clocksource and the per CPU clockevent driver for the
+ Qualcomm SoCs.
config CLKSRC_VERSATILE
bool "ARM Versatile (Express) reference platforms clock source"
@@ -297,21 +348,40 @@ config CLKSRC_MIPS_GIC
select CLKSRC_OF
config CLKSRC_TANGO_XTAL
- bool
+ bool "Clocksource for Tango SoC" if COMPILE_TEST
+ depends on ARM
select CLKSRC_OF
+ select CLKSRC_MMIO
+ help
+ This enables the clocksource for Tango SoC
config CLKSRC_PXA
- def_bool y if ARCH_PXA || ARCH_SA1100
- select CLKSRC_OF if OF
+ bool "Clocksource for PXA or SA-11x0 platform" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
+ select CLKSRC_MMIO
help
This enables OST0 support available on PXA and SA-11x0
platforms.
+config H8300_TMR8
+ bool "Clockevent timer for the H8300 platform" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
+ help
+ This enables the 8 bits timer for the H8300 platform.
+
config H8300_TMR16
- bool
+ bool "Clockevent timer for the H83069 platform" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
+ help
+ This enables the 16 bits timer for the H8300 platform with the
+ H83069 cpu.
config H8300_TPU
- bool
+ bool "Clocksource for the H8300 platform" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
+ help
+ This enables the clocksource for the H8300 platform with the
+ H8S2678 cpu.
config CLKSRC_IMX_GPT
bool "Clocksource using i.MX GPT" if COMPILE_TEST
@@ -319,8 +389,7 @@ config CLKSRC_IMX_GPT
select CLKSRC_MMIO
config CLKSRC_ST_LPC
- bool
- depends on ARCH_STI
+ bool "Low power clocksource found in the LPC" if COMPILE_TEST
select CLKSRC_OF if OF
help
Enable this option to use the Low Power controller timer
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 56bd16e77ae3..dc2b8997f6e6 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -60,7 +60,7 @@ obj-$(CONFIG_CLKSRC_MIPS_GIC) += mips-gic-timer.o
obj-$(CONFIG_CLKSRC_TANGO_XTAL) += tango_xtal.o
obj-$(CONFIG_CLKSRC_IMX_GPT) += timer-imx-gpt.o
obj-$(CONFIG_ASM9260_TIMER) += asm9260_timer.o
-obj-$(CONFIG_H8300) += h8300_timer8.o
+obj-$(CONFIG_H8300_TMR8) += h8300_timer8.o
obj-$(CONFIG_H8300_TMR16) += h8300_timer16.o
obj-$(CONFIG_H8300_TPU) += h8300_tpu.o
obj-$(CONFIG_CLKSRC_ST_LPC) += clksrc_st_lpc.o
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index 6eab88985670..28037d0b8dcd 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -109,10 +109,8 @@ static void acpi_pm_check_blacklist(struct pci_dev *dev)
/* the bug has been fixed in PIIX4M */
if (dev->revision < 3) {
- printk(KERN_WARNING "* Found PM-Timer Bug on the chipset."
- " Due to workarounds for a bug,\n"
- "* this clock source is slow. Consider trying"
- " other clock sources\n");
+ pr_warn("* Found PM-Timer Bug on the chipset. Due to workarounds for a bug,\n"
+ "* this clock source is slow. Consider trying other clock sources\n");
acpi_pm_need_workaround();
}
@@ -125,12 +123,9 @@ static void acpi_pm_check_graylist(struct pci_dev *dev)
if (acpi_pm_good)
return;
- printk(KERN_WARNING "* The chipset may have PM-Timer Bug. Due to"
- " workarounds for a bug,\n"
- "* this clock source is slow. If you are sure your timer"
- " does not have\n"
- "* this bug, please use \"acpi_pm_good\" to disable the"
- " workaround\n");
+ pr_warn("* The chipset may have PM-Timer Bug. Due to workarounds for a bug,\n"
+ "* this clock source is slow. If you are sure your timer does not have\n"
+ "* this bug, please use \"acpi_pm_good\" to disable the workaround\n");
acpi_pm_need_workaround();
}
@@ -162,8 +157,7 @@ static int verify_pmtmr_rate(void)
/* Check that the PMTMR delta is within 5% of what we expect */
if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 ||
delta > (PMTMR_EXPECTED_RATE * 21) / 20) {
- printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% "
- "of normal - aborting.\n",
+ pr_info("PM-Timer running at invalid rate: %lu%% of normal - aborting.\n",
100UL * delta / PMTMR_EXPECTED_RATE);
return -1;
}
@@ -199,15 +193,14 @@ static int __init init_acpi_pm_clocksource(void)
break;
if ((value2 < value1) && ((value2) < 0xFFF))
break;
- printk(KERN_INFO "PM-Timer had inconsistent results:"
- " %#llx, %#llx - aborting.\n",
- value1, value2);
+ pr_info("PM-Timer had inconsistent results: %#llx, %#llx - aborting.\n",
+ value1, value2);
pmtmr_ioport = 0;
return -EINVAL;
}
if (i == ACPI_PM_READ_CHECKS) {
- printk(KERN_INFO "PM-Timer failed consistency check "
- " (%#llx) - aborting.\n", value1);
+ pr_info("PM-Timer failed consistency check (%#llx) - aborting.\n",
+ value1);
pmtmr_ioport = 0;
return -ENODEV;
}
diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c
index a2cb6fae9295..d189d8cb69f7 100644
--- a/drivers/clocksource/arm_global_timer.c
+++ b/drivers/clocksource/arm_global_timer.c
@@ -99,17 +99,17 @@ static void gt_compare_set(unsigned long delta, int periodic)
counter += delta;
ctrl = GT_CONTROL_TIMER_ENABLE;
- writel(ctrl, gt_base + GT_CONTROL);
- writel(lower_32_bits(counter), gt_base + GT_COMP0);
- writel(upper_32_bits(counter), gt_base + GT_COMP1);
+ writel_relaxed(ctrl, gt_base + GT_CONTROL);
+ writel_relaxed(lower_32_bits(counter), gt_base + GT_COMP0);
+ writel_relaxed(upper_32_bits(counter), gt_base + GT_COMP1);
if (periodic) {
- writel(delta, gt_base + GT_AUTO_INC);
+ writel_relaxed(delta, gt_base + GT_AUTO_INC);
ctrl |= GT_CONTROL_AUTO_INC;
}
ctrl |= GT_CONTROL_COMP_ENABLE | GT_CONTROL_IRQ_ENABLE;
- writel(ctrl, gt_base + GT_CONTROL);
+ writel_relaxed(ctrl, gt_base + GT_CONTROL);
}
static int gt_clockevent_shutdown(struct clock_event_device *evt)
@@ -195,12 +195,23 @@ static cycle_t gt_clocksource_read(struct clocksource *cs)
return gt_counter_read();
}
+static void gt_resume(struct clocksource *cs)
+{
+ unsigned long ctrl;
+
+ ctrl = readl(gt_base + GT_CONTROL);
+ if (!(ctrl & GT_CONTROL_TIMER_ENABLE))
+ /* re-enable timer on resume */
+ writel(GT_CONTROL_TIMER_ENABLE, gt_base + GT_CONTROL);
+}
+
static struct clocksource gt_clocksource = {
.name = "arm_global_timer",
.rating = 300,
.read = gt_clocksource_read,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .resume = gt_resume,
};
#ifdef CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
diff --git a/drivers/clocksource/dw_apb_timer.c b/drivers/clocksource/dw_apb_timer.c
index c76c75006ea6..63345260244d 100644
--- a/drivers/clocksource/dw_apb_timer.c
+++ b/drivers/clocksource/dw_apb_timer.c
@@ -49,20 +49,31 @@ clocksource_to_dw_apb_clocksource(struct clocksource *cs)
return container_of(cs, struct dw_apb_clocksource, cs);
}
-static unsigned long apbt_readl(struct dw_apb_timer *timer, unsigned long offs)
+static inline u32 apbt_readl(struct dw_apb_timer *timer, unsigned long offs)
{
return readl(timer->base + offs);
}
-static void apbt_writel(struct dw_apb_timer *timer, unsigned long val,
- unsigned long offs)
+static inline void apbt_writel(struct dw_apb_timer *timer, u32 val,
+ unsigned long offs)
{
writel(val, timer->base + offs);
}
+static inline u32 apbt_readl_relaxed(struct dw_apb_timer *timer, unsigned long offs)
+{
+ return readl_relaxed(timer->base + offs);
+}
+
+static inline void apbt_writel_relaxed(struct dw_apb_timer *timer, u32 val,
+ unsigned long offs)
+{
+ writel_relaxed(val, timer->base + offs);
+}
+
static void apbt_disable_int(struct dw_apb_timer *timer)
{
- unsigned long ctrl = apbt_readl(timer, APBTMR_N_CONTROL);
+ u32 ctrl = apbt_readl(timer, APBTMR_N_CONTROL);
ctrl |= APBTMR_CONTROL_INT;
apbt_writel(timer, ctrl, APBTMR_N_CONTROL);
@@ -81,7 +92,7 @@ void dw_apb_clockevent_pause(struct dw_apb_clock_event_device *dw_ced)
static void apbt_eoi(struct dw_apb_timer *timer)
{
- apbt_readl(timer, APBTMR_N_EOI);
+ apbt_readl_relaxed(timer, APBTMR_N_EOI);
}
static irqreturn_t dw_apb_clockevent_irq(int irq, void *data)
@@ -103,7 +114,7 @@ static irqreturn_t dw_apb_clockevent_irq(int irq, void *data)
static void apbt_enable_int(struct dw_apb_timer *timer)
{
- unsigned long ctrl = apbt_readl(timer, APBTMR_N_CONTROL);
+ u32 ctrl = apbt_readl(timer, APBTMR_N_CONTROL);
/* clear pending intr */
apbt_readl(timer, APBTMR_N_EOI);
ctrl &= ~APBTMR_CONTROL_INT;
@@ -113,7 +124,7 @@ static void apbt_enable_int(struct dw_apb_timer *timer)
static int apbt_shutdown(struct clock_event_device *evt)
{
struct dw_apb_clock_event_device *dw_ced = ced_to_dw_apb_ced(evt);
- unsigned long ctrl;
+ u32 ctrl;
pr_debug("%s CPU %d state=shutdown\n", __func__,
cpumask_first(evt->cpumask));
@@ -127,7 +138,7 @@ static int apbt_shutdown(struct clock_event_device *evt)
static int apbt_set_oneshot(struct clock_event_device *evt)
{
struct dw_apb_clock_event_device *dw_ced = ced_to_dw_apb_ced(evt);
- unsigned long ctrl;
+ u32 ctrl;
pr_debug("%s CPU %d state=oneshot\n", __func__,
cpumask_first(evt->cpumask));
@@ -160,7 +171,7 @@ static int apbt_set_periodic(struct clock_event_device *evt)
{
struct dw_apb_clock_event_device *dw_ced = ced_to_dw_apb_ced(evt);
unsigned long period = DIV_ROUND_UP(dw_ced->timer.freq, HZ);
- unsigned long ctrl;
+ u32 ctrl;
pr_debug("%s CPU %d state=periodic\n", __func__,
cpumask_first(evt->cpumask));
@@ -196,17 +207,17 @@ static int apbt_resume(struct clock_event_device *evt)
static int apbt_next_event(unsigned long delta,
struct clock_event_device *evt)
{
- unsigned long ctrl;
+ u32 ctrl;
struct dw_apb_clock_event_device *dw_ced = ced_to_dw_apb_ced(evt);
/* Disable timer */
- ctrl = apbt_readl(&dw_ced->timer, APBTMR_N_CONTROL);
+ ctrl = apbt_readl_relaxed(&dw_ced->timer, APBTMR_N_CONTROL);
ctrl &= ~APBTMR_CONTROL_ENABLE;
- apbt_writel(&dw_ced->timer, ctrl, APBTMR_N_CONTROL);
+ apbt_writel_relaxed(&dw_ced->timer, ctrl, APBTMR_N_CONTROL);
/* write new count */
- apbt_writel(&dw_ced->timer, delta, APBTMR_N_LOAD_COUNT);
+ apbt_writel_relaxed(&dw_ced->timer, delta, APBTMR_N_LOAD_COUNT);
ctrl |= APBTMR_CONTROL_ENABLE;
- apbt_writel(&dw_ced->timer, ctrl, APBTMR_N_CONTROL);
+ apbt_writel_relaxed(&dw_ced->timer, ctrl, APBTMR_N_CONTROL);
return 0;
}
@@ -323,7 +334,7 @@ void dw_apb_clocksource_start(struct dw_apb_clocksource *dw_cs)
* start count down from 0xffff_ffff. this is done by toggling the
* enable bit then load initial load count to ~0.
*/
- unsigned long ctrl = apbt_readl(&dw_cs->timer, APBTMR_N_CONTROL);
+ u32 ctrl = apbt_readl(&dw_cs->timer, APBTMR_N_CONTROL);
ctrl &= ~APBTMR_CONTROL_ENABLE;
apbt_writel(&dw_cs->timer, ctrl, APBTMR_N_CONTROL);
@@ -338,11 +349,12 @@ void dw_apb_clocksource_start(struct dw_apb_clocksource *dw_cs)
static cycle_t __apbt_read_clocksource(struct clocksource *cs)
{
- unsigned long current_count;
+ u32 current_count;
struct dw_apb_clocksource *dw_cs =
clocksource_to_dw_apb_clocksource(cs);
- current_count = apbt_readl(&dw_cs->timer, APBTMR_N_CURRENT_VALUE);
+ current_count = apbt_readl_relaxed(&dw_cs->timer,
+ APBTMR_N_CURRENT_VALUE);
return (cycle_t)~current_count;
}
diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c
index a19a3f619cc7..860843cef572 100644
--- a/drivers/clocksource/dw_apb_timer_of.c
+++ b/drivers/clocksource/dw_apb_timer_of.c
@@ -16,6 +16,7 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/delay.h>
#include <linux/dw_apb_timer.h>
#include <linux/of.h>
#include <linux/of_address.h>
@@ -130,6 +131,17 @@ static void __init init_sched_clock(void)
sched_clock_register(read_sched_clock, 32, sched_rate);
}
+#ifdef CONFIG_ARM
+static unsigned long dw_apb_delay_timer_read(void)
+{
+ return ~readl_relaxed(sched_io_base);
+}
+
+static struct delay_timer dw_apb_delay_timer = {
+ .read_current_timer = dw_apb_delay_timer_read,
+};
+#endif
+
static int num_called;
static void __init dw_apb_timer_init(struct device_node *timer)
{
@@ -142,6 +154,10 @@ static void __init dw_apb_timer_init(struct device_node *timer)
pr_debug("%s: found clocksource timer\n", __func__);
add_clocksource(timer);
init_sched_clock();
+#ifdef CONFIG_ARM
+ dw_apb_delay_timer.freq = sched_rate;
+ register_current_timer_delay(&dw_apb_delay_timer);
+#endif
break;
default:
break;
diff --git a/drivers/clocksource/fsl_ftm_timer.c b/drivers/clocksource/fsl_ftm_timer.c
index 10202f1fdfd7..517e1c7624d4 100644
--- a/drivers/clocksource/fsl_ftm_timer.c
+++ b/drivers/clocksource/fsl_ftm_timer.c
@@ -203,7 +203,7 @@ static int __init ftm_clockevent_init(unsigned long freq, int irq)
int err;
ftm_writel(0x00, priv->clkevt_base + FTM_CNTIN);
- ftm_writel(~0UL, priv->clkevt_base + FTM_MOD);
+ ftm_writel(~0u, priv->clkevt_base + FTM_MOD);
ftm_reset_counter(priv->clkevt_base);
@@ -230,7 +230,7 @@ static int __init ftm_clocksource_init(unsigned long freq)
int err;
ftm_writel(0x00, priv->clksrc_base + FTM_CNTIN);
- ftm_writel(~0UL, priv->clksrc_base + FTM_MOD);
+ ftm_writel(~0u, priv->clksrc_base + FTM_MOD);
ftm_reset_counter(priv->clksrc_base);
diff --git a/drivers/clocksource/h8300_timer16.c b/drivers/clocksource/h8300_timer16.c
index 0e076c6fc006..75c44079b345 100644
--- a/drivers/clocksource/h8300_timer16.c
+++ b/drivers/clocksource/h8300_timer16.c
@@ -4,85 +4,56 @@
* Copyright 2015 Yoshinori Sato <ysato@users.sourcefoge.jp>
*/
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/init.h>
-#include <linux/platform_device.h>
#include <linux/clocksource.h>
-#include <linux/module.h>
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/of.h>
-
-#include <asm/segment.h>
-#include <asm/irq.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
#define TSTR 0
-#define TSNC 1
-#define TMDR 2
-#define TOLR 3
-#define TISRA 4
-#define TISRB 5
#define TISRC 6
#define TCR 0
-#define TIOR 1
#define TCNT 2
-#define GRA 4
-#define GRB 6
-
-#define FLAG_REPROGRAM (1 << 0)
-#define FLAG_SKIPEVENT (1 << 1)
-#define FLAG_IRQCONTEXT (1 << 2)
-#define FLAG_STARTED (1 << 3)
-#define ONESHOT 0
-#define PERIODIC 1
-
-#define RELATIVE 0
-#define ABSOLUTE 1
+#define bset(b, a) iowrite8(ioread8(a) | (1 << (b)), (a))
+#define bclr(b, a) iowrite8(ioread8(a) & ~(1 << (b)), (a))
struct timer16_priv {
- struct platform_device *pdev;
struct clocksource cs;
- struct irqaction irqaction;
unsigned long total_cycles;
- unsigned long mapbase;
- unsigned long mapcommon;
- unsigned long flags;
- unsigned short gra;
+ void __iomem *mapbase;
+ void __iomem *mapcommon;
unsigned short cs_enabled;
unsigned char enb;
- unsigned char imfa;
- unsigned char imiea;
unsigned char ovf;
- raw_spinlock_t lock;
- struct clk *clk;
+ unsigned char ovie;
};
static unsigned long timer16_get_counter(struct timer16_priv *p)
{
- unsigned long v1, v2, v3;
- int o1, o2;
+ unsigned short v1, v2, v3;
+ unsigned char o1, o2;
- o1 = ctrl_inb(p->mapcommon + TISRC) & p->ovf;
+ o1 = ioread8(p->mapcommon + TISRC) & p->ovf;
/* Make sure the timer value is stable. Stolen from acpi_pm.c */
do {
o2 = o1;
- v1 = ctrl_inw(p->mapbase + TCNT);
- v2 = ctrl_inw(p->mapbase + TCNT);
- v3 = ctrl_inw(p->mapbase + TCNT);
- o1 = ctrl_inb(p->mapcommon + TISRC) & p->ovf;
+ v1 = ioread16be(p->mapbase + TCNT);
+ v2 = ioread16be(p->mapbase + TCNT);
+ v3 = ioread16be(p->mapbase + TCNT);
+ o1 = ioread8(p->mapcommon + TISRC) & p->ovf;
} while (unlikely((o1 != o2) || (v1 > v2 && v1 < v3)
|| (v2 > v3 && v2 < v1) || (v3 > v1 && v3 < v2)));
- v2 |= 0x10000;
- return v2;
+ if (likely(!o1))
+ return v2;
+ else
+ return v2 + 0x10000;
}
@@ -90,8 +61,7 @@ static irqreturn_t timer16_interrupt(int irq, void *dev_id)
{
struct timer16_priv *p = (struct timer16_priv *)dev_id;
- ctrl_outb(ctrl_inb(p->mapcommon + TISRA) & ~p->imfa,
- p->mapcommon + TISRA);
+ bclr(p->ovf, p->mapcommon + TISRC);
p->total_cycles += 0x10000;
return IRQ_HANDLED;
@@ -105,13 +75,10 @@ static inline struct timer16_priv *cs_to_priv(struct clocksource *cs)
static cycle_t timer16_clocksource_read(struct clocksource *cs)
{
struct timer16_priv *p = cs_to_priv(cs);
- unsigned long flags, raw;
- unsigned long value;
+ unsigned long raw, value;
- raw_spin_lock_irqsave(&p->lock, flags);
value = p->total_cycles;
raw = timer16_get_counter(p);
- raw_spin_unlock_irqrestore(&p->lock, flags);
return value + raw;
}
@@ -123,10 +90,10 @@ static int timer16_enable(struct clocksource *cs)
WARN_ON(p->cs_enabled);
p->total_cycles = 0;
- ctrl_outw(0x0000, p->mapbase + TCNT);
- ctrl_outb(0x83, p->mapbase + TCR);
- ctrl_outb(ctrl_inb(p->mapcommon + TSTR) | p->enb,
- p->mapcommon + TSTR);
+ iowrite16be(0x0000, p->mapbase + TCNT);
+ iowrite8(0x83, p->mapbase + TCR);
+ bset(p->ovie, p->mapcommon + TISRC);
+ bset(p->enb, p->mapcommon + TSTR);
p->cs_enabled = true;
return 0;
@@ -138,116 +105,83 @@ static void timer16_disable(struct clocksource *cs)
WARN_ON(!p->cs_enabled);
- ctrl_outb(ctrl_inb(p->mapcommon + TSTR) & ~p->enb,
- p->mapcommon + TSTR);
+ bclr(p->ovie, p->mapcommon + TISRC);
+ bclr(p->enb, p->mapcommon + TSTR);
p->cs_enabled = false;
}
+static struct timer16_priv timer16_priv = {
+ .cs = {
+ .name = "h8300_16timer",
+ .rating = 200,
+ .read = timer16_clocksource_read,
+ .enable = timer16_enable,
+ .disable = timer16_disable,
+ .mask = CLOCKSOURCE_MASK(sizeof(unsigned long) * 8),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ },
+};
+
#define REG_CH 0
#define REG_COMM 1
-static int timer16_setup(struct timer16_priv *p, struct platform_device *pdev)
+static void __init h8300_16timer_init(struct device_node *node)
{
- struct resource *res[2];
+ void __iomem *base[2];
int ret, irq;
unsigned int ch;
+ struct clk *clk;
- p->pdev = pdev;
-
- res[REG_CH] = platform_get_resource(p->pdev,
- IORESOURCE_MEM, REG_CH);
- res[REG_COMM] = platform_get_resource(p->pdev,
- IORESOURCE_MEM, REG_COMM);
- if (!res[REG_CH] || !res[REG_COMM]) {
- dev_err(&p->pdev->dev, "failed to get I/O memory\n");
- return -ENXIO;
- }
- irq = platform_get_irq(p->pdev, 0);
- if (irq < 0) {
- dev_err(&p->pdev->dev, "failed to get irq\n");
- return irq;
+ clk = of_clk_get(node, 0);
+ if (IS_ERR(clk)) {
+ pr_err("failed to get clock for clocksource\n");
+ return;
}
- p->clk = clk_get(&p->pdev->dev, "fck");
- if (IS_ERR(p->clk)) {
- dev_err(&p->pdev->dev, "can't get clk\n");
- return PTR_ERR(p->clk);
+ base[REG_CH] = of_iomap(node, 0);
+ if (!base[REG_CH]) {
+ pr_err("failed to map registers for clocksource\n");
+ goto free_clk;
}
- of_property_read_u32(p->pdev->dev.of_node, "renesas,channel", &ch);
-
- p->pdev = pdev;
- p->mapbase = res[REG_CH]->start;
- p->mapcommon = res[REG_COMM]->start;
- p->enb = 1 << ch;
- p->imfa = 1 << ch;
- p->imiea = 1 << (4 + ch);
- p->cs.name = pdev->name;
- p->cs.rating = 200;
- p->cs.read = timer16_clocksource_read;
- p->cs.enable = timer16_enable;
- p->cs.disable = timer16_disable;
- p->cs.mask = CLOCKSOURCE_MASK(sizeof(unsigned long) * 8);
- p->cs.flags = CLOCK_SOURCE_IS_CONTINUOUS;
- ret = request_irq(irq, timer16_interrupt,
- IRQF_TIMER, pdev->name, p);
- if (ret < 0) {
- dev_err(&p->pdev->dev, "failed to request irq %d\n", irq);
- return ret;
+ base[REG_COMM] = of_iomap(node, 1);
+ if (!base[REG_COMM]) {
+ pr_err("failed to map registers for clocksource\n");
+ goto unmap_ch;
}
- clocksource_register_hz(&p->cs, clk_get_rate(p->clk) / 8);
-
- return 0;
-}
-
-static int timer16_probe(struct platform_device *pdev)
-{
- struct timer16_priv *p = platform_get_drvdata(pdev);
-
- if (p) {
- dev_info(&pdev->dev, "kept as earlytimer\n");
- return 0;
+ irq = irq_of_parse_and_map(node, 0);
+ if (!irq) {
+ pr_err("failed to get irq for clockevent\n");
+ goto unmap_comm;
}
- p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL);
- if (!p)
- return -ENOMEM;
+ of_property_read_u32(node, "renesas,channel", &ch);
- return timer16_setup(p, pdev);
-}
-
-static int timer16_remove(struct platform_device *pdev)
-{
- return -EBUSY;
-}
+ timer16_priv.mapbase = base[REG_CH];
+ timer16_priv.mapcommon = base[REG_COMM];
+ timer16_priv.enb = ch;
+ timer16_priv.ovf = ch;
+ timer16_priv.ovie = 4 + ch;
-static const struct of_device_id timer16_of_table[] = {
- { .compatible = "renesas,16bit-timer" },
- { }
-};
-static struct platform_driver timer16_driver = {
- .probe = timer16_probe,
- .remove = timer16_remove,
- .driver = {
- .name = "h8300h-16timer",
- .of_match_table = of_match_ptr(timer16_of_table),
+ ret = request_irq(irq, timer16_interrupt,
+ IRQF_TIMER, timer16_priv.cs.name, &timer16_priv);
+ if (ret < 0) {
+ pr_err("failed to request irq %d of clocksource\n", irq);
+ goto unmap_comm;
}
-};
-static int __init timer16_init(void)
-{
- return platform_driver_register(&timer16_driver);
-}
+ clocksource_register_hz(&timer16_priv.cs,
+ clk_get_rate(clk) / 8);
+ return;
-static void __exit timer16_exit(void)
-{
- platform_driver_unregister(&timer16_driver);
+unmap_comm:
+ iounmap(base[REG_COMM]);
+unmap_ch:
+ iounmap(base[REG_CH]);
+free_clk:
+ clk_put(clk);
}
-subsys_initcall(timer16_init);
-module_exit(timer16_exit);
-MODULE_AUTHOR("Yoshinori Sato");
-MODULE_DESCRIPTION("H8/300H 16bit Timer Driver");
-MODULE_LICENSE("GPL v2");
+CLOCKSOURCE_OF_DECLARE(h8300_16bit, "renesas,16bit-timer", h8300_16timer_init);
diff --git a/drivers/clocksource/h8300_timer8.c b/drivers/clocksource/h8300_timer8.c
index 44375d8b9bc4..c151941e1956 100644
--- a/drivers/clocksource/h8300_timer8.c
+++ b/drivers/clocksource/h8300_timer8.c
@@ -8,19 +8,15 @@
*/
#include <linux/errno.h>
-#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
#include <linux/clockchips.h>
-#include <linux/module.h>
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/of.h>
-
-#include <asm/irq.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
#define _8TCR 0
#define _8TCSR 2
@@ -28,126 +24,74 @@
#define TCORB 6
#define _8TCNT 8
-#define FLAG_REPROGRAM (1 << 0)
-#define FLAG_SKIPEVENT (1 << 1)
-#define FLAG_IRQCONTEXT (1 << 2)
+#define CMIEA 6
+#define CMFA 6
+
#define FLAG_STARTED (1 << 3)
-#define ONESHOT 0
-#define PERIODIC 1
+#define SCALE 64
-#define RELATIVE 0
-#define ABSOLUTE 1
+#define bset(b, a) iowrite8(ioread8(a) | (1 << (b)), (a))
+#define bclr(b, a) iowrite8(ioread8(a) & ~(1 << (b)), (a))
struct timer8_priv {
- struct platform_device *pdev;
struct clock_event_device ced;
- struct irqaction irqaction;
- unsigned long mapbase;
- raw_spinlock_t lock;
+ void __iomem *mapbase;
unsigned long flags;
unsigned int rate;
- unsigned int tcora;
- struct clk *pclk;
};
-static unsigned long timer8_get_counter(struct timer8_priv *p)
-{
- unsigned long v1, v2, v3;
- int o1, o2;
-
- o1 = ctrl_inb(p->mapbase + _8TCSR) & 0x20;
-
- /* Make sure the timer value is stable. Stolen from acpi_pm.c */
- do {
- o2 = o1;
- v1 = ctrl_inw(p->mapbase + _8TCNT);
- v2 = ctrl_inw(p->mapbase + _8TCNT);
- v3 = ctrl_inw(p->mapbase + _8TCNT);
- o1 = ctrl_inb(p->mapbase + _8TCSR) & 0x20;
- } while (unlikely((o1 != o2) || (v1 > v2 && v1 < v3)
- || (v2 > v3 && v2 < v1) || (v3 > v1 && v3 < v2)));
-
- v2 |= o1 << 10;
- return v2;
-}
-
static irqreturn_t timer8_interrupt(int irq, void *dev_id)
{
struct timer8_priv *p = dev_id;
- ctrl_outb(ctrl_inb(p->mapbase + _8TCSR) & ~0x40,
- p->mapbase + _8TCSR);
- p->flags |= FLAG_IRQCONTEXT;
- ctrl_outw(p->tcora, p->mapbase + TCORA);
- if (!(p->flags & FLAG_SKIPEVENT)) {
- if (clockevent_state_oneshot(&p->ced))
- ctrl_outw(0x0000, p->mapbase + _8TCR);
- p->ced.event_handler(&p->ced);
- }
- p->flags &= ~(FLAG_SKIPEVENT | FLAG_IRQCONTEXT);
+ if (clockevent_state_oneshot(&p->ced))
+ iowrite16be(0x0000, p->mapbase + _8TCR);
+
+ p->ced.event_handler(&p->ced);
+
+ bclr(CMFA, p->mapbase + _8TCSR);
return IRQ_HANDLED;
}
static void timer8_set_next(struct timer8_priv *p, unsigned long delta)
{
- unsigned long flags;
- unsigned long now;
-
- raw_spin_lock_irqsave(&p->lock, flags);
if (delta >= 0x10000)
- dev_warn(&p->pdev->dev, "delta out of range\n");
- now = timer8_get_counter(p);
- p->tcora = delta;
- ctrl_outb(ctrl_inb(p->mapbase + _8TCR) | 0x40, p->mapbase + _8TCR);
- if (delta > now)
- ctrl_outw(delta, p->mapbase + TCORA);
- else
- ctrl_outw(now + 1, p->mapbase + TCORA);
-
- raw_spin_unlock_irqrestore(&p->lock, flags);
+ pr_warn("delta out of range\n");
+ bclr(CMIEA, p->mapbase + _8TCR);
+ iowrite16be(delta, p->mapbase + TCORA);
+ iowrite16be(0x0000, p->mapbase + _8TCNT);
+ bclr(CMFA, p->mapbase + _8TCSR);
+ bset(CMIEA, p->mapbase + _8TCR);
}
static int timer8_enable(struct timer8_priv *p)
{
- p->rate = clk_get_rate(p->pclk) / 64;
- ctrl_outw(0xffff, p->mapbase + TCORA);
- ctrl_outw(0x0000, p->mapbase + _8TCNT);
- ctrl_outw(0x0c02, p->mapbase + _8TCR);
+ iowrite16be(0xffff, p->mapbase + TCORA);
+ iowrite16be(0x0000, p->mapbase + _8TCNT);
+ iowrite16be(0x0c02, p->mapbase + _8TCR);
return 0;
}
static int timer8_start(struct timer8_priv *p)
{
- int ret = 0;
- unsigned long flags;
-
- raw_spin_lock_irqsave(&p->lock, flags);
-
- if (!(p->flags & FLAG_STARTED))
- ret = timer8_enable(p);
+ int ret;
- if (ret)
- goto out;
- p->flags |= FLAG_STARTED;
+ if ((p->flags & FLAG_STARTED))
+ return 0;
- out:
- raw_spin_unlock_irqrestore(&p->lock, flags);
+ ret = timer8_enable(p);
+ if (!ret)
+ p->flags |= FLAG_STARTED;
return ret;
}
static void timer8_stop(struct timer8_priv *p)
{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&p->lock, flags);
-
- ctrl_outw(0x0000, p->mapbase + _8TCR);
-
- raw_spin_unlock_irqrestore(&p->lock, flags);
+ iowrite16be(0x0000, p->mapbase + _8TCR);
}
static inline struct timer8_priv *ced_to_priv(struct clock_event_device *ced)
@@ -155,7 +99,7 @@ static inline struct timer8_priv *ced_to_priv(struct clock_event_device *ced)
return container_of(ced, struct timer8_priv, ced);
}
-static void timer8_clock_event_start(struct timer8_priv *p, int periodic)
+static void timer8_clock_event_start(struct timer8_priv *p, unsigned long delta)
{
struct clock_event_device *ced = &p->ced;
@@ -166,7 +110,7 @@ static void timer8_clock_event_start(struct timer8_priv *p, int periodic)
ced->max_delta_ns = clockevent_delta2ns(0xffff, ced);
ced->min_delta_ns = clockevent_delta2ns(0x0001, ced);
- timer8_set_next(p, periodic?(p->rate + HZ/2) / HZ:0x10000);
+ timer8_set_next(p, delta);
}
static int timer8_clock_event_shutdown(struct clock_event_device *ced)
@@ -179,9 +123,9 @@ static int timer8_clock_event_periodic(struct clock_event_device *ced)
{
struct timer8_priv *p = ced_to_priv(ced);
- dev_info(&p->pdev->dev, "used for periodic clock events\n");
+ pr_info("%s: used for periodic clock events\n", ced->name);
timer8_stop(p);
- timer8_clock_event_start(p, PERIODIC);
+ timer8_clock_event_start(p, (p->rate + HZ/2) / HZ);
return 0;
}
@@ -190,9 +134,9 @@ static int timer8_clock_event_oneshot(struct clock_event_device *ced)
{
struct timer8_priv *p = ced_to_priv(ced);
- dev_info(&p->pdev->dev, "used for oneshot clock events\n");
+ pr_info("%s: used for oneshot clock events\n", ced->name);
timer8_stop(p);
- timer8_clock_event_start(p, ONESHOT);
+ timer8_clock_event_start(p, 0x10000);
return 0;
}
@@ -208,110 +152,64 @@ static int timer8_clock_event_next(unsigned long delta,
return 0;
}
-static int timer8_setup(struct timer8_priv *p,
- struct platform_device *pdev)
+static struct timer8_priv timer8_priv = {
+ .ced = {
+ .name = "h8300_8timer",
+ .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+ .rating = 200,
+ .set_next_event = timer8_clock_event_next,
+ .set_state_shutdown = timer8_clock_event_shutdown,
+ .set_state_periodic = timer8_clock_event_periodic,
+ .set_state_oneshot = timer8_clock_event_oneshot,
+ },
+};
+
+static void __init h8300_8timer_init(struct device_node *node)
{
- struct resource *res;
+ void __iomem *base;
int irq;
- int ret;
+ struct clk *clk;
- p->pdev = pdev;
-
- res = platform_get_resource(p->pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(&p->pdev->dev, "failed to get I/O memory\n");
- return -ENXIO;
+ clk = of_clk_get(node, 0);
+ if (IS_ERR(clk)) {
+ pr_err("failed to get clock for clockevent\n");
+ return;
}
- irq = platform_get_irq(p->pdev, 0);
- if (irq < 0) {
- dev_err(&p->pdev->dev, "failed to get irq\n");
- return -ENXIO;
+ base = of_iomap(node, 0);
+ if (!base) {
+ pr_err("failed to map registers for clockevent\n");
+ goto free_clk;
}
- p->mapbase = res->start;
-
- p->irqaction.name = dev_name(&p->pdev->dev);
- p->irqaction.handler = timer8_interrupt;
- p->irqaction.dev_id = p;
- p->irqaction.flags = IRQF_TIMER;
-
- p->pclk = clk_get(&p->pdev->dev, "fck");
- if (IS_ERR(p->pclk)) {
- dev_err(&p->pdev->dev, "can't get clk\n");
- return PTR_ERR(p->pclk);
+ irq = irq_of_parse_and_map(node, 0);
+ if (!irq) {
+ pr_err("failed to get irq for clockevent\n");
+ goto unmap_reg;
}
- p->ced.name = pdev->name;
- p->ced.features = CLOCK_EVT_FEAT_PERIODIC |
- CLOCK_EVT_FEAT_ONESHOT;
- p->ced.rating = 200;
- p->ced.cpumask = cpumask_of(0);
- p->ced.set_next_event = timer8_clock_event_next;
- p->ced.set_state_shutdown = timer8_clock_event_shutdown;
- p->ced.set_state_periodic = timer8_clock_event_periodic;
- p->ced.set_state_oneshot = timer8_clock_event_oneshot;
-
- ret = setup_irq(irq, &p->irqaction);
- if (ret < 0) {
- dev_err(&p->pdev->dev,
- "failed to request irq %d\n", irq);
- return ret;
- }
- clockevents_register_device(&p->ced);
- platform_set_drvdata(pdev, p);
+ timer8_priv.mapbase = base;
- return 0;
-}
-
-static int timer8_probe(struct platform_device *pdev)
-{
- struct timer8_priv *p = platform_get_drvdata(pdev);
-
- if (p) {
- dev_info(&pdev->dev, "kept as earlytimer\n");
- return 0;
+ timer8_priv.rate = clk_get_rate(clk) / SCALE;
+ if (!timer8_priv.rate) {
+ pr_err("Failed to get rate for the clocksource\n");
+ goto unmap_reg;
}
- p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL);
- if (!p)
- return -ENOMEM;
-
- return timer8_setup(p, pdev);
-}
-
-static int timer8_remove(struct platform_device *pdev)
-{
- return -EBUSY;
-}
-
-static const struct of_device_id timer8_of_table[] __maybe_unused = {
- { .compatible = "renesas,8bit-timer" },
- { }
-};
-
-MODULE_DEVICE_TABLE(of, timer8_of_table);
-static struct platform_driver timer8_driver = {
- .probe = timer8_probe,
- .remove = timer8_remove,
- .driver = {
- .name = "h8300-8timer",
- .of_match_table = of_match_ptr(timer8_of_table),
+ if (request_irq(irq, timer8_interrupt, IRQF_TIMER,
+ timer8_priv.ced.name, &timer8_priv) < 0) {
+ pr_err("failed to request irq %d for clockevent\n", irq);
+ goto unmap_reg;
}
-};
-static int __init timer8_init(void)
-{
- return platform_driver_register(&timer8_driver);
-}
+ clockevents_config_and_register(&timer8_priv.ced,
+ timer8_priv.rate, 1, 0x0000ffff);
-static void __exit timer8_exit(void)
-{
- platform_driver_unregister(&timer8_driver);
+ return;
+unmap_reg:
+ iounmap(base);
+free_clk:
+ clk_put(clk);
}
-subsys_initcall(timer8_init);
-module_exit(timer8_exit);
-MODULE_AUTHOR("Yoshinori Sato");
-MODULE_DESCRIPTION("H8/300 8bit Timer Driver");
-MODULE_LICENSE("GPL v2");
+CLOCKSOURCE_OF_DECLARE(h8300_8bit, "renesas,8bit-timer", h8300_8timer_init);
diff --git a/drivers/clocksource/h8300_tpu.c b/drivers/clocksource/h8300_tpu.c
index 5487410bfabb..d4c1a287c262 100644
--- a/drivers/clocksource/h8300_tpu.c
+++ b/drivers/clocksource/h8300_tpu.c
@@ -1,42 +1,30 @@
/*
- * H8/300 TPU Driver
+ * H8S TPU Driver
*
* Copyright 2015 Yoshinori Sato <ysato@users.sourcefoge.jp>
*
*/
#include <linux/errno.h>
-#include <linux/sched.h>
#include <linux/kernel.h>
-#include <linux/interrupt.h>
#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
#include <linux/clocksource.h>
-#include <linux/module.h>
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
-#include <asm/irq.h>
+#define TCR 0x0
+#define TSR 0x5
+#define TCNT 0x6
-#define TCR 0
-#define TMDR 1
-#define TIOR 2
-#define TER 4
-#define TSR 5
-#define TCNT 6
-#define TGRA 8
-#define TGRB 10
-#define TGRC 12
-#define TGRD 14
+#define TCFV 0x10
struct tpu_priv {
- struct platform_device *pdev;
struct clocksource cs;
- struct clk *clk;
- unsigned long mapbase1;
- unsigned long mapbase2;
+ void __iomem *mapbase1;
+ void __iomem *mapbase2;
raw_spinlock_t lock;
unsigned int cs_enabled;
};
@@ -45,8 +33,8 @@ static inline unsigned long read_tcnt32(struct tpu_priv *p)
{
unsigned long tcnt;
- tcnt = ctrl_inw(p->mapbase1 + TCNT) << 16;
- tcnt |= ctrl_inw(p->mapbase2 + TCNT);
+ tcnt = ioread16be(p->mapbase1 + TCNT) << 16;
+ tcnt |= ioread16be(p->mapbase2 + TCNT);
return tcnt;
}
@@ -55,7 +43,7 @@ static int tpu_get_counter(struct tpu_priv *p, unsigned long long *val)
unsigned long v1, v2, v3;
int o1, o2;
- o1 = ctrl_inb(p->mapbase1 + TSR) & 0x10;
+ o1 = ioread8(p->mapbase1 + TSR) & TCFV;
/* Make sure the timer value is stable. Stolen from acpi_pm.c */
do {
@@ -63,7 +51,7 @@ static int tpu_get_counter(struct tpu_priv *p, unsigned long long *val)
v1 = read_tcnt32(p);
v2 = read_tcnt32(p);
v3 = read_tcnt32(p);
- o1 = ctrl_inb(p->mapbase1 + TSR) & 0x10;
+ o1 = ioread8(p->mapbase1 + TSR) & TCFV;
} while (unlikely((o1 != o2) || (v1 > v2 && v1 < v3)
|| (v2 > v3 && v2 < v1) || (v3 > v1 && v3 < v2)));
@@ -96,10 +84,10 @@ static int tpu_clocksource_enable(struct clocksource *cs)
WARN_ON(p->cs_enabled);
- ctrl_outw(0, p->mapbase1 + TCNT);
- ctrl_outw(0, p->mapbase2 + TCNT);
- ctrl_outb(0x0f, p->mapbase1 + TCR);
- ctrl_outb(0x03, p->mapbase2 + TCR);
+ iowrite16be(0, p->mapbase1 + TCNT);
+ iowrite16be(0, p->mapbase2 + TCNT);
+ iowrite8(0x0f, p->mapbase1 + TCR);
+ iowrite8(0x03, p->mapbase2 + TCR);
p->cs_enabled = true;
return 0;
@@ -111,96 +99,59 @@ static void tpu_clocksource_disable(struct clocksource *cs)
WARN_ON(!p->cs_enabled);
- ctrl_outb(0, p->mapbase1 + TCR);
- ctrl_outb(0, p->mapbase2 + TCR);
+ iowrite8(0, p->mapbase1 + TCR);
+ iowrite8(0, p->mapbase2 + TCR);
p->cs_enabled = false;
}
+static struct tpu_priv tpu_priv = {
+ .cs = {
+ .name = "H8S_TPU",
+ .rating = 200,
+ .read = tpu_clocksource_read,
+ .enable = tpu_clocksource_enable,
+ .disable = tpu_clocksource_disable,
+ .mask = CLOCKSOURCE_MASK(sizeof(unsigned long) * 8),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ },
+};
+
#define CH_L 0
#define CH_H 1
-static int __init tpu_setup(struct tpu_priv *p, struct platform_device *pdev)
+static void __init h8300_tpu_init(struct device_node *node)
{
- struct resource *res[2];
-
- p->pdev = pdev;
+ void __iomem *base[2];
+ struct clk *clk;
- res[CH_L] = platform_get_resource(p->pdev, IORESOURCE_MEM, CH_L);
- res[CH_H] = platform_get_resource(p->pdev, IORESOURCE_MEM, CH_H);
- if (!res[CH_L] || !res[CH_H]) {
- dev_err(&p->pdev->dev, "failed to get I/O memory\n");
- return -ENXIO;
+ clk = of_clk_get(node, 0);
+ if (IS_ERR(clk)) {
+ pr_err("failed to get clock for clocksource\n");
+ return;
}
- p->clk = clk_get(&p->pdev->dev, "fck");
- if (IS_ERR(p->clk)) {
- dev_err(&p->pdev->dev, "can't get clk\n");
- return PTR_ERR(p->clk);
+ base[CH_L] = of_iomap(node, CH_L);
+ if (!base[CH_L]) {
+ pr_err("failed to map registers for clocksource\n");
+ goto free_clk;
}
-
- p->mapbase1 = res[CH_L]->start;
- p->mapbase2 = res[CH_H]->start;
-
- p->cs.name = pdev->name;
- p->cs.rating = 200;
- p->cs.read = tpu_clocksource_read;
- p->cs.enable = tpu_clocksource_enable;
- p->cs.disable = tpu_clocksource_disable;
- p->cs.mask = CLOCKSOURCE_MASK(sizeof(unsigned long) * 8);
- p->cs.flags = CLOCK_SOURCE_IS_CONTINUOUS;
- clocksource_register_hz(&p->cs, clk_get_rate(p->clk) / 64);
- platform_set_drvdata(pdev, p);
-
- return 0;
-}
-
-static int tpu_probe(struct platform_device *pdev)
-{
- struct tpu_priv *p = platform_get_drvdata(pdev);
-
- if (p) {
- dev_info(&pdev->dev, "kept as earlytimer\n");
- return 0;
+ base[CH_H] = of_iomap(node, CH_H);
+ if (!base[CH_H]) {
+ pr_err("failed to map registers for clocksource\n");
+ goto unmap_L;
}
- p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL);
- if (!p)
- return -ENOMEM;
+ tpu_priv.mapbase1 = base[CH_L];
+ tpu_priv.mapbase2 = base[CH_H];
- return tpu_setup(p, pdev);
-}
-
-static int tpu_remove(struct platform_device *pdev)
-{
- return -EBUSY;
-}
-
-static const struct of_device_id tpu_of_table[] = {
- { .compatible = "renesas,tpu" },
- { }
-};
+ clocksource_register_hz(&tpu_priv.cs, clk_get_rate(clk) / 64);
-static struct platform_driver tpu_driver = {
- .probe = tpu_probe,
- .remove = tpu_remove,
- .driver = {
- .name = "h8s-tpu",
- .of_match_table = of_match_ptr(tpu_of_table),
- }
-};
-
-static int __init tpu_init(void)
-{
- return platform_driver_register(&tpu_driver);
-}
+ return;
-static void __exit tpu_exit(void)
-{
- platform_driver_unregister(&tpu_driver);
+unmap_L:
+ iounmap(base[CH_H]);
+free_clk:
+ clk_put(clk);
}
-subsys_initcall(tpu_init);
-module_exit(tpu_exit);
-MODULE_AUTHOR("Yoshinori Sato");
-MODULE_DESCRIPTION("H8S Timer Pulse Unit Driver");
-MODULE_LICENSE("GPL v2");
+CLOCKSOURCE_OF_DECLARE(h8300_tpu, "renesas,tpu", h8300_tpu_init);
diff --git a/drivers/clocksource/mmio.c b/drivers/clocksource/mmio.c
index 1593ade2a815..c4f7d7a9b689 100644
--- a/drivers/clocksource/mmio.c
+++ b/drivers/clocksource/mmio.c
@@ -55,7 +55,7 @@ int __init clocksource_mmio_init(void __iomem *base, const char *name,
{
struct clocksource_mmio *cs;
- if (bits > 32 || bits < 16)
+ if (bits > 64 || bits < 16)
return -EINVAL;
cs = kzalloc(sizeof(struct clocksource_mmio), GFP_KERNEL);
diff --git a/drivers/clocksource/mtk_timer.c b/drivers/clocksource/mtk_timer.c
index fbfc74685e6a..d67bc356488f 100644
--- a/drivers/clocksource/mtk_timer.c
+++ b/drivers/clocksource/mtk_timer.c
@@ -16,6 +16,8 @@
* GNU General Public License for more details.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/clk.h>
#include <linux/clockchips.h>
#include <linux/interrupt.h>
@@ -187,10 +189,8 @@ static void __init mtk_timer_init(struct device_node *node)
struct clk *clk;
evt = kzalloc(sizeof(*evt), GFP_KERNEL);
- if (!evt) {
- pr_warn("Can't allocate mtk clock event driver struct");
+ if (!evt)
return;
- }
evt->dev.name = "mtk_tick";
evt->dev.rating = 300;
@@ -204,31 +204,31 @@ static void __init mtk_timer_init(struct device_node *node)
evt->gpt_base = of_io_request_and_map(node, 0, "mtk-timer");
if (IS_ERR(evt->gpt_base)) {
- pr_warn("Can't get resource\n");
- return;
+ pr_err("Can't get resource\n");
+ goto err_kzalloc;
}
evt->dev.irq = irq_of_parse_and_map(node, 0);
if (evt->dev.irq <= 0) {
- pr_warn("Can't parse IRQ");
+ pr_err("Can't parse IRQ\n");
goto err_mem;
}
clk = of_clk_get(node, 0);
if (IS_ERR(clk)) {
- pr_warn("Can't get timer clock");
+ pr_err("Can't get timer clock\n");
goto err_irq;
}
if (clk_prepare_enable(clk)) {
- pr_warn("Can't prepare clock");
+ pr_err("Can't prepare clock\n");
goto err_clk_put;
}
rate = clk_get_rate(clk);
if (request_irq(evt->dev.irq, mtk_timer_interrupt,
IRQF_TIMER | IRQF_IRQPOLL, "mtk_timer", evt)) {
- pr_warn("failed to setup irq %d\n", evt->dev.irq);
+ pr_err("failed to setup irq %d\n", evt->dev.irq);
goto err_clk_disable;
}
@@ -260,5 +260,7 @@ err_mem:
iounmap(evt->gpt_base);
of_address_to_resource(node, 0, &res);
release_mem_region(res.start, resource_size(&res));
+err_kzalloc:
+ kfree(evt);
}
CLOCKSOURCE_OF_DECLARE(mtk_mt6577, "mediatek,mt6577-timer", mtk_timer_init);
diff --git a/drivers/clocksource/rockchip_timer.c b/drivers/clocksource/rockchip_timer.c
index d3c1742ded1a..8c77a529d0d4 100644
--- a/drivers/clocksource/rockchip_timer.c
+++ b/drivers/clocksource/rockchip_timer.c
@@ -17,16 +17,16 @@
#define TIMER_NAME "rk_timer"
-#define TIMER_LOAD_COUNT0 0x00
-#define TIMER_LOAD_COUNT1 0x04
-#define TIMER_CONTROL_REG 0x10
-#define TIMER_INT_STATUS 0x18
+#define TIMER_LOAD_COUNT0 0x00
+#define TIMER_LOAD_COUNT1 0x04
+#define TIMER_CONTROL_REG 0x10
+#define TIMER_INT_STATUS 0x18
-#define TIMER_DISABLE 0x0
-#define TIMER_ENABLE 0x1
-#define TIMER_MODE_FREE_RUNNING (0 << 1)
-#define TIMER_MODE_USER_DEFINED_COUNT (1 << 1)
-#define TIMER_INT_UNMASK (1 << 2)
+#define TIMER_DISABLE 0x0
+#define TIMER_ENABLE 0x1
+#define TIMER_MODE_FREE_RUNNING (0 << 1)
+#define TIMER_MODE_USER_DEFINED_COUNT (1 << 1)
+#define TIMER_INT_UNMASK (1 << 2)
struct bc_timer {
struct clock_event_device ce;
@@ -49,14 +49,12 @@ static inline void __iomem *rk_base(struct clock_event_device *ce)
static inline void rk_timer_disable(struct clock_event_device *ce)
{
writel_relaxed(TIMER_DISABLE, rk_base(ce) + TIMER_CONTROL_REG);
- dsb();
}
static inline void rk_timer_enable(struct clock_event_device *ce, u32 flags)
{
writel_relaxed(TIMER_ENABLE | TIMER_INT_UNMASK | flags,
rk_base(ce) + TIMER_CONTROL_REG);
- dsb();
}
static void rk_timer_update_counter(unsigned long cycles,
@@ -64,13 +62,11 @@ static void rk_timer_update_counter(unsigned long cycles,
{
writel_relaxed(cycles, rk_base(ce) + TIMER_LOAD_COUNT0);
writel_relaxed(0, rk_base(ce) + TIMER_LOAD_COUNT1);
- dsb();
}
static void rk_timer_interrupt_clear(struct clock_event_device *ce)
{
writel_relaxed(1, rk_base(ce) + TIMER_INT_STATUS);
- dsb();
}
static inline int rk_timer_set_next_event(unsigned long cycles,
@@ -173,4 +169,5 @@ static void __init rk_timer_init(struct device_node *np)
clockevents_config_and_register(ce, bc_timer.freq, 1, UINT_MAX);
}
+
CLOCKSOURCE_OF_DECLARE(rk_timer, "rockchip,rk3288-timer", rk_timer_init);
diff --git a/drivers/clocksource/tango_xtal.c b/drivers/clocksource/tango_xtal.c
index d297b30d2bc0..2bcecafdeaea 100644
--- a/drivers/clocksource/tango_xtal.c
+++ b/drivers/clocksource/tango_xtal.c
@@ -19,19 +19,6 @@ static u64 notrace read_sched_clock(void)
return read_xtal_counter();
}
-static cycle_t read_clocksource(struct clocksource *cs)
-{
- return read_xtal_counter();
-}
-
-static struct clocksource tango_xtal = {
- .name = "tango-xtal",
- .rating = 350,
- .read = read_clocksource,
- .mask = CLOCKSOURCE_MASK(32),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
static void __init tango_clocksource_init(struct device_node *np)
{
struct clk *clk;
@@ -53,8 +40,9 @@ static void __init tango_clocksource_init(struct device_node *np)
delay_timer.freq = xtal_freq;
delay_timer.read_current_timer = read_xtal_counter;
- ret = clocksource_register_hz(&tango_xtal, xtal_freq);
- if (ret != 0) {
+ ret = clocksource_mmio_init(xtal_in_cnt, "tango-xtal", xtal_freq, 350,
+ 32, clocksource_mmio_readl_up);
+ if (!ret) {
pr_err("%s: registration failed\n", np->full_name);
return;
}
diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c
index 6ebda1177e79..38333aba3055 100644
--- a/drivers/clocksource/tegra20_timer.c
+++ b/drivers/clocksource/tegra20_timer.c
@@ -96,7 +96,8 @@ static struct clock_event_device tegra_clockevent = {
.name = "timer0",
.rating = 300,
.features = CLOCK_EVT_FEAT_ONESHOT |
- CLOCK_EVT_FEAT_PERIODIC,
+ CLOCK_EVT_FEAT_PERIODIC |
+ CLOCK_EVT_FEAT_DYNIRQ,
.set_next_event = tegra_timer_set_next_event,
.set_state_shutdown = tegra_timer_shutdown,
.set_state_periodic = tegra_timer_set_periodic,
diff --git a/drivers/clocksource/time-lpc32xx.c b/drivers/clocksource/time-lpc32xx.c
index a1c06a2bc77c..1316876b487a 100644
--- a/drivers/clocksource/time-lpc32xx.c
+++ b/drivers/clocksource/time-lpc32xx.c
@@ -125,7 +125,7 @@ static int __init lpc32xx_clocksource_init(struct device_node *np)
clk = of_clk_get_by_name(np, "timerclk");
if (IS_ERR(clk)) {
- pr_err("clock get failed (%lu)\n", PTR_ERR(clk));
+ pr_err("clock get failed (%ld)\n", PTR_ERR(clk));
return PTR_ERR(clk);
}
@@ -184,7 +184,7 @@ static int __init lpc32xx_clockevent_init(struct device_node *np)
clk = of_clk_get_by_name(np, "timerclk");
if (IS_ERR(clk)) {
- pr_err("clock get failed (%lu)\n", PTR_ERR(clk));
+ pr_err("clock get failed (%ld)\n", PTR_ERR(clk));
return PTR_ERR(clk);
}
diff --git a/drivers/clocksource/time-pistachio.c b/drivers/clocksource/time-pistachio.c
index bba679900054..3269d9ef7a18 100644
--- a/drivers/clocksource/time-pistachio.c
+++ b/drivers/clocksource/time-pistachio.c
@@ -84,7 +84,7 @@ pistachio_clocksource_read_cycles(struct clocksource *cs)
counter = gpt_readl(pcs->base, TIMER_CURRENT_VALUE, 0);
raw_spin_unlock_irqrestore(&pcs->lock, flags);
- return ~(cycle_t)counter;
+ return (cycle_t)~counter;
}
static u64 notrace pistachio_read_sched_clock(void)
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
index bca9573e036a..24c83f9efd87 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -152,13 +152,6 @@ static irqreturn_t sun5i_timer_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static cycle_t sun5i_clksrc_read(struct clocksource *clksrc)
-{
- struct sun5i_timer_clksrc *cs = to_sun5i_timer_clksrc(clksrc);
-
- return ~readl(cs->timer.base + TIMER_CNTVAL_LO_REG(1));
-}
-
static int sun5i_rate_cb_clksrc(struct notifier_block *nb,
unsigned long event, void *data)
{
@@ -217,13 +210,8 @@ static int __init sun5i_setup_clocksource(struct device_node *node,
writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
base + TIMER_CTL_REG(1));
- cs->clksrc.name = node->name;
- cs->clksrc.rating = 340;
- cs->clksrc.read = sun5i_clksrc_read;
- cs->clksrc.mask = CLOCKSOURCE_MASK(32);
- cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
-
- ret = clocksource_register_hz(&cs->clksrc, rate);
+ ret = clocksource_mmio_init(base + TIMER_CNTVAL_LO_REG(1), node->name,
+ rate, 340, 32, clocksource_mmio_readl_down);
if (ret) {
pr_err("Couldn't register clock source.\n");
goto err_remove_notifier;
diff --git a/drivers/clocksource/vt8500_timer.c b/drivers/clocksource/vt8500_timer.c
index a92e94b40b5b..de49805fbb09 100644
--- a/drivers/clocksource/vt8500_timer.c
+++ b/drivers/clocksource/vt8500_timer.c
@@ -30,7 +30,6 @@
#include <linux/clocksource.h>
#include <linux/clockchips.h>
#include <linux/delay.h>
-#include <asm/mach/time.h>
#include <linux/of.h>
#include <linux/of_address.h>
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index d7373ca69c99..25693b045371 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -179,26 +179,21 @@ static int cn_call_callback(struct sk_buff *skb)
*
* It checks skb, netlink header and msg sizes, and calls callback helper.
*/
-static void cn_rx_skb(struct sk_buff *__skb)
+static void cn_rx_skb(struct sk_buff *skb)
{
struct nlmsghdr *nlh;
- struct sk_buff *skb;
int len, err;
- skb = skb_get(__skb);
-
if (skb->len >= NLMSG_HDRLEN) {
nlh = nlmsg_hdr(skb);
len = nlmsg_len(nlh);
if (len < (int)sizeof(struct cn_msg) ||
skb->len < nlh->nlmsg_len ||
- len > CONNECTOR_MAX_MSG_SIZE) {
- kfree_skb(skb);
+ len > CONNECTOR_MAX_MSG_SIZE)
return;
- }
- err = cn_call_callback(skb);
+ err = cn_call_callback(skb_get(skb));
if (err < 0)
kfree_skb(skb);
}
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 1582c1c016b0..b1f8a73e5a94 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -84,6 +84,7 @@ config ARM_KIRKWOOD_CPUFREQ
config ARM_MT8173_CPUFREQ
bool "Mediatek MT8173 CPUFreq support"
depends on ARCH_MEDIATEK && REGULATOR
+ depends on ARM64 || (ARM_CPU_TOPOLOGY && COMPILE_TEST)
depends on !CPU_THERMAL || THERMAL=y
select PM_OPP
help
@@ -201,7 +202,7 @@ config ARM_SA1110_CPUFREQ
config ARM_SCPI_CPUFREQ
tristate "SCPI based CPUfreq driver"
- depends on ARM_BIG_LITTLE_CPUFREQ && ARM_SCPI_PROTOCOL
+ depends on ARM_BIG_LITTLE_CPUFREQ && ARM_SCPI_PROTOCOL && COMMON_CLK_SCPI
help
This adds the CPUfreq driver support for ARM big.LITTLE platforms
using SCPI protocol for CPU power management.
@@ -225,7 +226,7 @@ config ARM_TEGRA20_CPUFREQ
config ARM_TEGRA124_CPUFREQ
tristate "Tegra124 CPUFreq support"
- depends on ARCH_TEGRA && CPUFREQ_DT
+ depends on ARCH_TEGRA && CPUFREQ_DT && REGULATOR
default y
help
This adds the CPUFreq driver support for Tegra124 SOCs.
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index adbd1de1cea5..c59bdcb83217 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -5,7 +5,6 @@
config X86_INTEL_PSTATE
bool "Intel P state control"
depends on X86
- select ACPI_PROCESSOR if ACPI
help
This driver provides a P state for Intel core processors.
The driver implements an internal governor and will become
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index e8cb334094b0..7c0bdfb1a2ca 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -98,10 +98,11 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
policy->max = cpu->perf_caps.highest_perf;
policy->cpuinfo.min_freq = policy->min;
policy->cpuinfo.max_freq = policy->max;
+ policy->shared_type = cpu->shared_type;
if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
cpumask_copy(policy->cpus, cpu->shared_cpu_map);
- else {
+ else if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL) {
/* Support only SW_ANY for now. */
pr_debug("Unsupported CPU co-ord type\n");
return -EFAULT;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 7c48e7316d91..8412ce5f93a7 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -976,10 +976,14 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
new_policy.governor = gov;
- /* Use the default policy if its valid. */
- if (cpufreq_driver->setpolicy)
- cpufreq_parse_governor(gov->name, &new_policy.policy, NULL);
-
+ /* Use the default policy if there is no last_policy. */
+ if (cpufreq_driver->setpolicy) {
+ if (policy->last_policy)
+ new_policy.policy = policy->last_policy;
+ else
+ cpufreq_parse_governor(gov->name, &new_policy.policy,
+ NULL);
+ }
/* set default policy */
return cpufreq_set_policy(policy, &new_policy);
}
@@ -1330,6 +1334,8 @@ static void cpufreq_offline_prepare(unsigned int cpu)
if (has_target())
strncpy(policy->last_governor, policy->governor->name,
CPUFREQ_NAME_LEN);
+ else
+ policy->last_policy = policy->policy;
} else if (cpu == policy->cpu) {
/* Nominate new CPU */
policy->cpu = cpumask_any(policy->cpus);
@@ -1401,13 +1407,10 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
}
cpumask_clear_cpu(cpu, policy->real_cpus);
+ remove_cpu_dev_symlink(policy, cpu);
- if (cpumask_empty(policy->real_cpus)) {
+ if (cpumask_empty(policy->real_cpus))
cpufreq_policy_free(policy, true);
- return;
- }
-
- remove_cpu_dev_symlink(policy, cpu);
}
static void handle_update(struct work_struct *work)
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 2e31d097def6..98fb8821382d 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -34,14 +34,10 @@
#include <asm/cpu_device_id.h>
#include <asm/cpufeature.h>
-#if IS_ENABLED(CONFIG_ACPI)
-#include <acpi/processor.h>
-#endif
-
-#define BYT_RATIOS 0x66a
-#define BYT_VIDS 0x66b
-#define BYT_TURBO_RATIOS 0x66c
-#define BYT_TURBO_VIDS 0x66d
+#define ATOM_RATIOS 0x66a
+#define ATOM_VIDS 0x66b
+#define ATOM_TURBO_RATIOS 0x66c
+#define ATOM_TURBO_VIDS 0x66d
#define FRAC_BITS 8
#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
@@ -117,9 +113,6 @@ struct cpudata {
u64 prev_mperf;
u64 prev_tsc;
struct sample sample;
-#if IS_ENABLED(CONFIG_ACPI)
- struct acpi_processor_performance acpi_perf_data;
-#endif
};
static struct cpudata **all_cpu_data;
@@ -150,7 +143,6 @@ struct cpu_defaults {
static struct pstate_adjust_policy pid_params;
static struct pstate_funcs pstate_funcs;
static int hwp_active;
-static int no_acpi_perf;
struct perf_limits {
int no_turbo;
@@ -163,8 +155,6 @@ struct perf_limits {
int max_sysfs_pct;
int min_policy_pct;
int min_sysfs_pct;
- int max_perf_ctl;
- int min_perf_ctl;
};
static struct perf_limits performance_limits = {
@@ -191,8 +181,6 @@ static struct perf_limits powersave_limits = {
.max_sysfs_pct = 100,
.min_policy_pct = 0,
.min_sysfs_pct = 0,
- .max_perf_ctl = 0,
- .min_perf_ctl = 0,
};
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
@@ -201,153 +189,6 @@ static struct perf_limits *limits = &performance_limits;
static struct perf_limits *limits = &powersave_limits;
#endif
-#if IS_ENABLED(CONFIG_ACPI)
-/*
- * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and
- * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and
- * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state
- * ratio, out of it only high 8 bits are used. For example 0x1700 is setting
- * target ratio 0x17. The _PSS control value stores in a format which can be
- * directly written to PERF_CTL MSR. But in intel_pstate driver this shift
- * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()).
- * This function converts the _PSS control value to intel pstate driver format
- * for comparison and assignment.
- */
-static int convert_to_native_pstate_format(struct cpudata *cpu, int index)
-{
- return cpu->acpi_perf_data.states[index].control >> 8;
-}
-
-static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy)
-{
- struct cpudata *cpu;
- int ret;
- bool turbo_absent = false;
- int max_pstate_index;
- int min_pss_ctl, max_pss_ctl, turbo_pss_ctl;
- int i;
-
- cpu = all_cpu_data[policy->cpu];
-
- pr_debug("intel_pstate: default limits 0x%x 0x%x 0x%x\n",
- cpu->pstate.min_pstate, cpu->pstate.max_pstate,
- cpu->pstate.turbo_pstate);
-
- if (!cpu->acpi_perf_data.shared_cpu_map &&
- zalloc_cpumask_var_node(&cpu->acpi_perf_data.shared_cpu_map,
- GFP_KERNEL, cpu_to_node(policy->cpu))) {
- return -ENOMEM;
- }
-
- ret = acpi_processor_register_performance(&cpu->acpi_perf_data,
- policy->cpu);
- if (ret)
- return ret;
-
- /*
- * Check if the control value in _PSS is for PERF_CTL MSR, which should
- * guarantee that the states returned by it map to the states in our
- * list directly.
- */
- if (cpu->acpi_perf_data.control_register.space_id !=
- ACPI_ADR_SPACE_FIXED_HARDWARE)
- return -EIO;
-
- pr_debug("intel_pstate: CPU%u - ACPI _PSS perf data\n", policy->cpu);
- for (i = 0; i < cpu->acpi_perf_data.state_count; i++)
- pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n",
- (i == cpu->acpi_perf_data.state ? '*' : ' '), i,
- (u32) cpu->acpi_perf_data.states[i].core_frequency,
- (u32) cpu->acpi_perf_data.states[i].power,
- (u32) cpu->acpi_perf_data.states[i].control);
-
- /*
- * If there is only one entry _PSS, simply ignore _PSS and continue as
- * usual without taking _PSS into account
- */
- if (cpu->acpi_perf_data.state_count < 2)
- return 0;
-
- turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0);
- min_pss_ctl = convert_to_native_pstate_format(cpu,
- cpu->acpi_perf_data.state_count - 1);
- /* Check if there is a turbo freq in _PSS */
- if (turbo_pss_ctl <= cpu->pstate.max_pstate &&
- turbo_pss_ctl > cpu->pstate.min_pstate) {
- pr_debug("intel_pstate: no turbo range exists in _PSS\n");
- limits->no_turbo = limits->turbo_disabled = 1;
- cpu->pstate.turbo_pstate = cpu->pstate.max_pstate;
- turbo_absent = true;
- }
-
- /* Check if the max non turbo p state < Intel P state max */
- max_pstate_index = turbo_absent ? 0 : 1;
- max_pss_ctl = convert_to_native_pstate_format(cpu, max_pstate_index);
- if (max_pss_ctl < cpu->pstate.max_pstate &&
- max_pss_ctl > cpu->pstate.min_pstate)
- cpu->pstate.max_pstate = max_pss_ctl;
-
- /* check If min perf > Intel P State min */
- if (min_pss_ctl > cpu->pstate.min_pstate &&
- min_pss_ctl < cpu->pstate.max_pstate) {
- cpu->pstate.min_pstate = min_pss_ctl;
- policy->cpuinfo.min_freq = min_pss_ctl * cpu->pstate.scaling;
- }
-
- if (turbo_absent)
- policy->cpuinfo.max_freq = cpu->pstate.max_pstate *
- cpu->pstate.scaling;
- else {
- policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate *
- cpu->pstate.scaling;
- /*
- * The _PSS table doesn't contain whole turbo frequency range.
- * This just contains +1 MHZ above the max non turbo frequency,
- * with control value corresponding to max turbo ratio. But
- * when cpufreq set policy is called, it will call with this
- * max frequency, which will cause a reduced performance as
- * this driver uses real max turbo frequency as the max
- * frequeny. So correct this frequency in _PSS table to
- * correct max turbo frequency based on the turbo ratio.
- * Also need to convert to MHz as _PSS freq is in MHz.
- */
- cpu->acpi_perf_data.states[0].core_frequency =
- turbo_pss_ctl * 100;
- }
-
- pr_debug("intel_pstate: Updated limits using _PSS 0x%x 0x%x 0x%x\n",
- cpu->pstate.min_pstate, cpu->pstate.max_pstate,
- cpu->pstate.turbo_pstate);
- pr_debug("intel_pstate: policy max_freq=%d Khz min_freq = %d KHz\n",
- policy->cpuinfo.max_freq, policy->cpuinfo.min_freq);
-
- return 0;
-}
-
-static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
-{
- struct cpudata *cpu;
-
- if (!no_acpi_perf)
- return 0;
-
- cpu = all_cpu_data[policy->cpu];
- acpi_processor_unregister_performance(policy->cpu);
- return 0;
-}
-
-#else
-static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy)
-{
- return 0;
-}
-
-static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
-{
- return 0;
-}
-#endif
-
static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
int deadband, int integral) {
pid->setpoint = setpoint;
@@ -687,31 +528,31 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
}
-static int byt_get_min_pstate(void)
+static int atom_get_min_pstate(void)
{
u64 value;
- rdmsrl(BYT_RATIOS, value);
+ rdmsrl(ATOM_RATIOS, value);
return (value >> 8) & 0x7F;
}
-static int byt_get_max_pstate(void)
+static int atom_get_max_pstate(void)
{
u64 value;
- rdmsrl(BYT_RATIOS, value);
+ rdmsrl(ATOM_RATIOS, value);
return (value >> 16) & 0x7F;
}
-static int byt_get_turbo_pstate(void)
+static int atom_get_turbo_pstate(void)
{
u64 value;
- rdmsrl(BYT_TURBO_RATIOS, value);
+ rdmsrl(ATOM_TURBO_RATIOS, value);
return value & 0x7F;
}
-static void byt_set_pstate(struct cpudata *cpudata, int pstate)
+static void atom_set_pstate(struct cpudata *cpudata, int pstate)
{
u64 val;
int32_t vid_fp;
@@ -736,27 +577,42 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate)
wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
}
-#define BYT_BCLK_FREQS 5
-static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800};
-
-static int byt_get_scaling(void)
+static int silvermont_get_scaling(void)
{
u64 value;
int i;
+ /* Defined in Table 35-6 from SDM (Sept 2015) */
+ static int silvermont_freq_table[] = {
+ 83300, 100000, 133300, 116700, 80000};
rdmsrl(MSR_FSB_FREQ, value);
- i = value & 0x3;
+ i = value & 0x7;
+ WARN_ON(i > 4);
- BUG_ON(i > BYT_BCLK_FREQS);
+ return silvermont_freq_table[i];
+}
- return byt_freq_table[i] * 100;
+static int airmont_get_scaling(void)
+{
+ u64 value;
+ int i;
+ /* Defined in Table 35-10 from SDM (Sept 2015) */
+ static int airmont_freq_table[] = {
+ 83300, 100000, 133300, 116700, 80000,
+ 93300, 90000, 88900, 87500};
+
+ rdmsrl(MSR_FSB_FREQ, value);
+ i = value & 0xF;
+ WARN_ON(i > 8);
+
+ return airmont_freq_table[i];
}
-static void byt_get_vid(struct cpudata *cpudata)
+static void atom_get_vid(struct cpudata *cpudata)
{
u64 value;
- rdmsrl(BYT_VIDS, value);
+ rdmsrl(ATOM_VIDS, value);
cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
cpudata->vid.ratio = div_fp(
@@ -764,7 +620,7 @@ static void byt_get_vid(struct cpudata *cpudata)
int_tofp(cpudata->pstate.max_pstate -
cpudata->pstate.min_pstate));
- rdmsrl(BYT_TURBO_VIDS, value);
+ rdmsrl(ATOM_TURBO_VIDS, value);
cpudata->vid.turbo = value & 0x7f;
}
@@ -885,7 +741,7 @@ static struct cpu_defaults core_params = {
},
};
-static struct cpu_defaults byt_params = {
+static struct cpu_defaults silvermont_params = {
.pid_policy = {
.sample_rate_ms = 10,
.deadband = 0,
@@ -895,13 +751,33 @@ static struct cpu_defaults byt_params = {
.i_gain_pct = 4,
},
.funcs = {
- .get_max = byt_get_max_pstate,
- .get_max_physical = byt_get_max_pstate,
- .get_min = byt_get_min_pstate,
- .get_turbo = byt_get_turbo_pstate,
- .set = byt_set_pstate,
- .get_scaling = byt_get_scaling,
- .get_vid = byt_get_vid,
+ .get_max = atom_get_max_pstate,
+ .get_max_physical = atom_get_max_pstate,
+ .get_min = atom_get_min_pstate,
+ .get_turbo = atom_get_turbo_pstate,
+ .set = atom_set_pstate,
+ .get_scaling = silvermont_get_scaling,
+ .get_vid = atom_get_vid,
+ },
+};
+
+static struct cpu_defaults airmont_params = {
+ .pid_policy = {
+ .sample_rate_ms = 10,
+ .deadband = 0,
+ .setpoint = 60,
+ .p_gain_pct = 14,
+ .d_gain_pct = 0,
+ .i_gain_pct = 4,
+ },
+ .funcs = {
+ .get_max = atom_get_max_pstate,
+ .get_max_physical = atom_get_max_pstate,
+ .get_min = atom_get_min_pstate,
+ .get_turbo = atom_get_turbo_pstate,
+ .set = atom_set_pstate,
+ .get_scaling = airmont_get_scaling,
+ .get_vid = atom_get_vid,
},
};
@@ -938,23 +814,12 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
* policy, or by cpu specific default values determined through
* experimentation.
*/
- if (limits->max_perf_ctl && limits->max_sysfs_pct >=
- limits->max_policy_pct) {
- *max = limits->max_perf_ctl;
- } else {
- max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf),
- limits->max_perf));
- *max = clamp_t(int, max_perf_adj, cpu->pstate.min_pstate,
- cpu->pstate.turbo_pstate);
- }
+ max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf));
+ *max = clamp_t(int, max_perf_adj,
+ cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
- if (limits->min_perf_ctl) {
- *min = limits->min_perf_ctl;
- } else {
- min_perf = fp_toint(mul_fp(int_tofp(max_perf),
- limits->min_perf));
- *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
- }
+ min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf));
+ *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
}
static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force)
@@ -1153,7 +1018,7 @@ static void intel_pstate_timer_func(unsigned long __data)
static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
ICPU(0x2a, core_params),
ICPU(0x2d, core_params),
- ICPU(0x37, byt_params),
+ ICPU(0x37, silvermont_params),
ICPU(0x3a, core_params),
ICPU(0x3c, core_params),
ICPU(0x3d, core_params),
@@ -1162,7 +1027,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
ICPU(0x45, core_params),
ICPU(0x46, core_params),
ICPU(0x47, core_params),
- ICPU(0x4c, byt_params),
+ ICPU(0x4c, airmont_params),
ICPU(0x4e, core_params),
ICPU(0x4f, core_params),
ICPU(0x5e, core_params),
@@ -1229,12 +1094,6 @@ static unsigned int intel_pstate_get(unsigned int cpu_num)
static int intel_pstate_set_policy(struct cpufreq_policy *policy)
{
-#if IS_ENABLED(CONFIG_ACPI)
- struct cpudata *cpu;
- int i;
-#endif
- pr_debug("intel_pstate: %s max %u policy->max %u\n", __func__,
- policy->cpuinfo.max_freq, policy->max);
if (!policy->cpuinfo.max_freq)
return -ENODEV;
@@ -1242,6 +1101,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
policy->max >= policy->cpuinfo.max_freq) {
pr_debug("intel_pstate: set performance\n");
limits = &performance_limits;
+ if (hwp_active)
+ intel_pstate_hwp_set();
return 0;
}
@@ -1249,7 +1110,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
limits = &powersave_limits;
limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100);
- limits->max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq;
+ limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
+ policy->cpuinfo.max_freq);
limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0 , 100);
/* Normalize user input to [min_policy_pct, max_policy_pct] */
@@ -1261,6 +1123,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
limits->max_sysfs_pct);
limits->max_perf_pct = max(limits->min_policy_pct,
limits->max_perf_pct);
+ limits->max_perf = round_up(limits->max_perf, FRAC_BITS);
/* Make sure min_perf_pct <= max_perf_pct */
limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
@@ -1270,23 +1133,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
int_tofp(100));
-#if IS_ENABLED(CONFIG_ACPI)
- cpu = all_cpu_data[policy->cpu];
- for (i = 0; i < cpu->acpi_perf_data.state_count; i++) {
- int control;
-
- control = convert_to_native_pstate_format(cpu, i);
- if (control * cpu->pstate.scaling == policy->max)
- limits->max_perf_ctl = control;
- if (control * cpu->pstate.scaling == policy->min)
- limits->min_perf_ctl = control;
- }
-
- pr_debug("intel_pstate: max %u policy_max %u perf_ctl [0x%x-0x%x]\n",
- policy->cpuinfo.max_freq, policy->max, limits->min_perf_ctl,
- limits->max_perf_ctl);
-#endif
-
if (hwp_active)
intel_pstate_hwp_set();
@@ -1341,30 +1187,18 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
policy->cpuinfo.max_freq =
cpu->pstate.turbo_pstate * cpu->pstate.scaling;
- if (!no_acpi_perf)
- intel_pstate_init_perf_limits(policy);
- /*
- * If there is no acpi perf data or error, we ignore and use Intel P
- * state calculated limits, So this is not fatal error.
- */
policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
cpumask_set_cpu(policy->cpu, policy->cpus);
return 0;
}
-static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
-{
- return intel_pstate_exit_perf_limits(policy);
-}
-
static struct cpufreq_driver intel_pstate_driver = {
.flags = CPUFREQ_CONST_LOOPS,
.verify = intel_pstate_verify_policy,
.setpolicy = intel_pstate_set_policy,
.get = intel_pstate_get,
.init = intel_pstate_cpu_init,
- .exit = intel_pstate_cpu_exit,
.stop_cpu = intel_pstate_stop_cpu,
.name = "intel_pstate",
};
@@ -1406,6 +1240,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
}
#if IS_ENABLED(CONFIG_ACPI)
+#include <acpi/processor.h>
static bool intel_pstate_no_acpi_pss(void)
{
@@ -1601,9 +1436,6 @@ static int __init intel_pstate_setup(char *str)
force_load = 1;
if (!strcmp(str, "hwp_only"))
hwp_only = 1;
- if (!strcmp(str, "no_acpi"))
- no_acpi_perf = 1;
-
return 0;
}
early_param("intel_pstate", intel_pstate_setup);
diff --git a/drivers/cpufreq/s3c24xx-cpufreq.c b/drivers/cpufreq/s3c24xx-cpufreq.c
index 733aa5153e74..68ef8fd9482f 100644
--- a/drivers/cpufreq/s3c24xx-cpufreq.c
+++ b/drivers/cpufreq/s3c24xx-cpufreq.c
@@ -648,7 +648,7 @@ late_initcall(s3c_cpufreq_initcall);
*
* Register the given set of PLLs with the system.
*/
-int __init s3c_plltab_register(struct cpufreq_frequency_table *plls,
+int s3c_plltab_register(struct cpufreq_frequency_table *plls,
unsigned int plls_no)
{
struct cpufreq_frequency_table *vals;
diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c
index 2c3b16fd3a01..de5e89b2eaaa 100644
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c
@@ -31,7 +31,7 @@ static struct scpi_ops *scpi_ops;
static struct scpi_dvfs_info *scpi_get_dvfs_info(struct device *cpu_dev)
{
- u8 domain = topology_physical_package_id(cpu_dev->id);
+ int domain = topology_physical_package_id(cpu_dev->id);
if (domain < 0)
return ERR_PTR(-EINVAL);
diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c
index 73ef49922788..7038f364acb5 100644
--- a/drivers/crypto/nx/nx-aes-ccm.c
+++ b/drivers/crypto/nx/nx-aes-ccm.c
@@ -409,7 +409,7 @@ static int ccm_nx_decrypt(struct aead_request *req,
processed += to_process;
} while (processed < nbytes);
- rc = memcmp(csbcpb->cpb.aes_ccm.out_pat_or_mac, priv->oauth_tag,
+ rc = crypto_memneq(csbcpb->cpb.aes_ccm.out_pat_or_mac, priv->oauth_tag,
authsize) ? -EBADMSG : 0;
out:
spin_unlock_irqrestore(&nx_ctx->lock, irq_flags);
diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c
index eee624f589b6..abd465f479c4 100644
--- a/drivers/crypto/nx/nx-aes-gcm.c
+++ b/drivers/crypto/nx/nx-aes-gcm.c
@@ -21,6 +21,7 @@
#include <crypto/internal/aead.h>
#include <crypto/aes.h>
+#include <crypto/algapi.h>
#include <crypto/scatterwalk.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -418,7 +419,7 @@ mac:
itag, req->src, req->assoclen + nbytes,
crypto_aead_authsize(crypto_aead_reqtfm(req)),
SCATTERWALK_FROM_SG);
- rc = memcmp(itag, otag,
+ rc = crypto_memneq(itag, otag,
crypto_aead_authsize(crypto_aead_reqtfm(req))) ?
-EBADMSG : 0;
}
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index da2d6777bd09..97a364694bfc 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -515,7 +515,7 @@ static int __init padlock_init(void)
if (!x86_match_cpu(padlock_cpu_id))
return -ENODEV;
- if (!cpu_has_xcrypt_enabled) {
+ if (!boot_cpu_has(X86_FEATURE_XCRYPT_EN)) {
printk(KERN_NOTICE PFX "VIA PadLock detected, but not enabled. Hmm, strange...\n");
return -ENODEV;
}
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
index 4e154c9b9206..8c5f90647b7a 100644
--- a/drivers/crypto/padlock-sha.c
+++ b/drivers/crypto/padlock-sha.c
@@ -540,7 +540,7 @@ static int __init padlock_init(void)
struct shash_alg *sha1;
struct shash_alg *sha256;
- if (!x86_match_cpu(padlock_sha_ids) || !cpu_has_phe_enabled)
+ if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN))
return -ENODEV;
/* Register the newly added algorithm module if on *
diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
index 03856ad280b9..473d36d91644 100644
--- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c
+++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
@@ -198,7 +198,7 @@ static int adf_copy_key_value_data(struct adf_accel_dev *accel_dev,
goto out_err;
}
- params_head = section_head->params;
+ params_head = section.params;
while (params_head) {
if (copy_from_user(&key_val, (void __user *)params_head,
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 46f531e19ccf..b6f9f42e2985 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -977,7 +977,7 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev,
} else
oicv = (char *)&edesc->link_tbl[0];
- err = memcmp(oicv, icv, authsize) ? -EBADMSG : 0;
+ err = crypto_memneq(oicv, icv, authsize) ? -EBADMSG : 0;
}
kfree(edesc);
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 4e55239c7a30..53d22eb73b56 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -729,8 +729,8 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
return NULL;
dev_info(chan2dev(chan),
- "%s: src=0x%08x, dest=0x%08x, numf=%d, frame_size=%d, flags=0x%lx\n",
- __func__, xt->src_start, xt->dst_start, xt->numf,
+ "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n",
+ __func__, &xt->src_start, &xt->dst_start, xt->numf,
xt->frame_size, flags);
/*
@@ -824,8 +824,8 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
u32 ctrla;
u32 ctrlb;
- dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d0x%x s0x%x l0x%zx f0x%lx\n",
- dest, src, len, flags);
+ dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d%pad s%pad l0x%zx f0x%lx\n",
+ &dest, &src, len, flags);
if (unlikely(!len)) {
dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
@@ -938,8 +938,8 @@ atc_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
void __iomem *vaddr;
dma_addr_t paddr;
- dev_vdbg(chan2dev(chan), "%s: d0x%x v0x%x l0x%zx f0x%lx\n", __func__,
- dest, value, len, flags);
+ dev_vdbg(chan2dev(chan), "%s: d%pad v0x%x l0x%zx f0x%lx\n", __func__,
+ &dest, value, len, flags);
if (unlikely(!len)) {
dev_dbg(chan2dev(chan), "%s: length is zero!\n", __func__);
@@ -1022,8 +1022,8 @@ atc_prep_dma_memset_sg(struct dma_chan *chan,
dma_addr_t dest = sg_dma_address(sg);
size_t len = sg_dma_len(sg);
- dev_vdbg(chan2dev(chan), "%s: d0x%08x, l0x%zx\n",
- __func__, dest, len);
+ dev_vdbg(chan2dev(chan), "%s: d%pad, l0x%zx\n",
+ __func__, &dest, len);
if (!is_dma_fill_aligned(chan->device, dest, 0, len)) {
dev_err(chan2dev(chan), "%s: buffer is not aligned\n",
@@ -1439,9 +1439,9 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
unsigned int periods = buf_len / period_len;
unsigned int i;
- dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@0x%08x - %d (%d/%d)\n",
+ dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@%pad - %d (%d/%d)\n",
direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE",
- buf_addr,
+ &buf_addr,
periods, buf_len, period_len);
if (unlikely(!atslave || !buf_len || !period_len)) {
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
index d1cfc8c876f9..7f58f06157f6 100644
--- a/drivers/dma/at_hdmac_regs.h
+++ b/drivers/dma/at_hdmac_regs.h
@@ -385,9 +385,9 @@ static void vdbg_dump_regs(struct at_dma_chan *atchan) {}
static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli)
{
dev_crit(chan2dev(&atchan->chan_common),
- " desc: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n",
- lli->saddr, lli->daddr,
- lli->ctrla, lli->ctrlb, lli->dscr);
+ " desc: s%pad d%pad ctrl0x%x:0x%x l0x%pad\n",
+ &lli->saddr, &lli->daddr,
+ lli->ctrla, lli->ctrlb, &lli->dscr);
}
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index b5e132d4bae5..370c661c7d7b 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -156,7 +156,7 @@
#define AT_XDMAC_CC_WRIP (0x1 << 23) /* Write in Progress (read only) */
#define AT_XDMAC_CC_WRIP_DONE (0x0 << 23)
#define AT_XDMAC_CC_WRIP_IN_PROGRESS (0x1 << 23)
-#define AT_XDMAC_CC_PERID(i) (0x7f & (h) << 24) /* Channel Peripheral Identifier */
+#define AT_XDMAC_CC_PERID(i) (0x7f & (i) << 24) /* Channel Peripheral Identifier */
#define AT_XDMAC_CDS_MSP 0x2C /* Channel Data Stride Memory Set Pattern */
#define AT_XDMAC_CSUS 0x30 /* Channel Source Microblock Stride */
#define AT_XDMAC_CDUS 0x34 /* Channel Destination Microblock Stride */
@@ -920,8 +920,8 @@ at_xdmac_interleaved_queue_desc(struct dma_chan *chan,
desc->lld.mbr_cfg = chan_cc;
dev_dbg(chan2dev(chan),
- "%s: lld: mbr_sa=0x%08x, mbr_da=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
- __func__, desc->lld.mbr_sa, desc->lld.mbr_da,
+ "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+ __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da,
desc->lld.mbr_ubc, desc->lld.mbr_cfg);
/* Chain lld. */
@@ -953,8 +953,8 @@ at_xdmac_prep_interleaved(struct dma_chan *chan,
if ((xt->numf > 1) && (xt->frame_size > 1))
return NULL;
- dev_dbg(chan2dev(chan), "%s: src=0x%08x, dest=0x%08x, numf=%d, frame_size=%d, flags=0x%lx\n",
- __func__, xt->src_start, xt->dst_start, xt->numf,
+ dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n",
+ __func__, &xt->src_start, &xt->dst_start, xt->numf,
xt->frame_size, flags);
src_addr = xt->src_start;
@@ -965,7 +965,9 @@ at_xdmac_prep_interleaved(struct dma_chan *chan,
NULL,
src_addr, dst_addr,
xt, xt->sgl);
- for (i = 0; i < xt->numf; i++)
+
+ /* Length of the block is (BLEN+1) microblocks. */
+ for (i = 0; i < xt->numf - 1; i++)
at_xdmac_increment_block_count(chan, first);
dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
@@ -1086,6 +1088,7 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
/* Check remaining length and change data width if needed. */
dwidth = at_xdmac_align_width(chan,
src_addr | dst_addr | xfer_size);
+ chan_cc &= ~AT_XDMAC_CC_DWIDTH_MASK;
chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);
ublen = xfer_size >> dwidth;
@@ -1179,8 +1182,8 @@ static struct at_xdmac_desc *at_xdmac_memset_create_desc(struct dma_chan *chan,
desc->lld.mbr_cfg = chan_cc;
dev_dbg(chan2dev(chan),
- "%s: lld: mbr_da=0x%08x, mbr_ds=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
- __func__, desc->lld.mbr_da, desc->lld.mbr_ds, desc->lld.mbr_ubc,
+ "%s: lld: mbr_da=%pad, mbr_ds=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+ __func__, &desc->lld.mbr_da, &desc->lld.mbr_ds, desc->lld.mbr_ubc,
desc->lld.mbr_cfg);
return desc;
@@ -1193,8 +1196,8 @@ at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
struct at_xdmac_desc *desc;
- dev_dbg(chan2dev(chan), "%s: dest=0x%08x, len=%d, pattern=0x%x, flags=0x%lx\n",
- __func__, dest, len, value, flags);
+ dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%d, pattern=0x%x, flags=0x%lx\n",
+ __func__, &dest, len, value, flags);
if (unlikely(!len))
return NULL;
@@ -1229,8 +1232,8 @@ at_xdmac_prep_dma_memset_sg(struct dma_chan *chan, struct scatterlist *sgl,
/* Prepare descriptors. */
for_each_sg(sgl, sg, sg_len, i) {
- dev_dbg(chan2dev(chan), "%s: dest=0x%08x, len=%d, pattern=0x%x, flags=0x%lx\n",
- __func__, sg_dma_address(sg), sg_dma_len(sg),
+ dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%d, pattern=0x%x, flags=0x%lx\n",
+ __func__, &sg_dma_address(sg), sg_dma_len(sg),
value, flags);
desc = at_xdmac_memset_create_desc(chan, atchan,
sg_dma_address(sg),
@@ -1333,7 +1336,7 @@ at_xdmac_prep_dma_memset_sg(struct dma_chan *chan, struct scatterlist *sgl,
* since we don't care about the stride anymore.
*/
if ((i == (sg_len - 1)) &&
- sg_dma_len(ppsg) == sg_dma_len(psg)) {
+ sg_dma_len(psg) == sg_dma_len(sg)) {
dev_dbg(chan2dev(chan),
"%s: desc 0x%p can be merged with desc 0x%p\n",
__func__, desc, pdesc);
diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
index c92d6a70ccf3..996c4b00d323 100644
--- a/drivers/dma/bcm2835-dma.c
+++ b/drivers/dma/bcm2835-dma.c
@@ -31,6 +31,7 @@
*/
#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/interrupt.h>
@@ -62,6 +63,11 @@ struct bcm2835_dma_cb {
uint32_t pad[2];
};
+struct bcm2835_cb_entry {
+ struct bcm2835_dma_cb *cb;
+ dma_addr_t paddr;
+};
+
struct bcm2835_chan {
struct virt_dma_chan vc;
struct list_head node;
@@ -72,18 +78,18 @@ struct bcm2835_chan {
int ch;
struct bcm2835_desc *desc;
+ struct dma_pool *cb_pool;
void __iomem *chan_base;
int irq_number;
};
struct bcm2835_desc {
+ struct bcm2835_chan *c;
struct virt_dma_desc vd;
enum dma_transfer_direction dir;
- unsigned int control_block_size;
- struct bcm2835_dma_cb *control_block_base;
- dma_addr_t control_block_base_phys;
+ struct bcm2835_cb_entry *cb_list;
unsigned int frames;
size_t size;
@@ -143,10 +149,13 @@ static inline struct bcm2835_desc *to_bcm2835_dma_desc(
static void bcm2835_dma_desc_free(struct virt_dma_desc *vd)
{
struct bcm2835_desc *desc = container_of(vd, struct bcm2835_desc, vd);
- dma_free_coherent(desc->vd.tx.chan->device->dev,
- desc->control_block_size,
- desc->control_block_base,
- desc->control_block_base_phys);
+ int i;
+
+ for (i = 0; i < desc->frames; i++)
+ dma_pool_free(desc->c->cb_pool, desc->cb_list[i].cb,
+ desc->cb_list[i].paddr);
+
+ kfree(desc->cb_list);
kfree(desc);
}
@@ -199,7 +208,7 @@ static void bcm2835_dma_start_desc(struct bcm2835_chan *c)
c->desc = d = to_bcm2835_dma_desc(&vd->tx);
- writel(d->control_block_base_phys, c->chan_base + BCM2835_DMA_ADDR);
+ writel(d->cb_list[0].paddr, c->chan_base + BCM2835_DMA_ADDR);
writel(BCM2835_DMA_ACTIVE, c->chan_base + BCM2835_DMA_CS);
}
@@ -232,9 +241,16 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data)
static int bcm2835_dma_alloc_chan_resources(struct dma_chan *chan)
{
struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+ struct device *dev = c->vc.chan.device->dev;
+
+ dev_dbg(dev, "Allocating DMA channel %d\n", c->ch);
- dev_dbg(c->vc.chan.device->dev,
- "Allocating DMA channel %d\n", c->ch);
+ c->cb_pool = dma_pool_create(dev_name(dev), dev,
+ sizeof(struct bcm2835_dma_cb), 0, 0);
+ if (!c->cb_pool) {
+ dev_err(dev, "unable to allocate descriptor pool\n");
+ return -ENOMEM;
+ }
return request_irq(c->irq_number,
bcm2835_dma_callback, 0, "DMA IRQ", c);
@@ -246,6 +262,7 @@ static void bcm2835_dma_free_chan_resources(struct dma_chan *chan)
vchan_free_chan_resources(&c->vc);
free_irq(c->irq_number, c);
+ dma_pool_destroy(c->cb_pool);
dev_dbg(c->vc.chan.device->dev, "Freeing DMA channel %u\n", c->ch);
}
@@ -261,8 +278,7 @@ static size_t bcm2835_dma_desc_size_pos(struct bcm2835_desc *d, dma_addr_t addr)
size_t size;
for (size = i = 0; i < d->frames; i++) {
- struct bcm2835_dma_cb *control_block =
- &d->control_block_base[i];
+ struct bcm2835_dma_cb *control_block = d->cb_list[i].cb;
size_t this_size = control_block->length;
dma_addr_t dma;
@@ -343,6 +359,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
dma_addr_t dev_addr;
unsigned int es, sync_type;
unsigned int frame;
+ int i;
/* Grab configuration */
if (!is_slave_direction(direction)) {
@@ -374,27 +391,31 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
if (!d)
return NULL;
+ d->c = c;
d->dir = direction;
d->frames = buf_len / period_len;
- /* Allocate memory for control blocks */
- d->control_block_size = d->frames * sizeof(struct bcm2835_dma_cb);
- d->control_block_base = dma_zalloc_coherent(chan->device->dev,
- d->control_block_size, &d->control_block_base_phys,
- GFP_NOWAIT);
-
- if (!d->control_block_base) {
+ d->cb_list = kcalloc(d->frames, sizeof(*d->cb_list), GFP_KERNEL);
+ if (!d->cb_list) {
kfree(d);
return NULL;
}
+ /* Allocate memory for control blocks */
+ for (i = 0; i < d->frames; i++) {
+ struct bcm2835_cb_entry *cb_entry = &d->cb_list[i];
+
+ cb_entry->cb = dma_pool_zalloc(c->cb_pool, GFP_ATOMIC,
+ &cb_entry->paddr);
+ if (!cb_entry->cb)
+ goto error_cb;
+ }
/*
* Iterate over all frames, create a control block
* for each frame and link them together.
*/
for (frame = 0; frame < d->frames; frame++) {
- struct bcm2835_dma_cb *control_block =
- &d->control_block_base[frame];
+ struct bcm2835_dma_cb *control_block = d->cb_list[frame].cb;
/* Setup adresses */
if (d->dir == DMA_DEV_TO_MEM) {
@@ -428,12 +449,21 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
* This DMA engine driver currently only supports cyclic DMA.
* Therefore, wrap around at number of frames.
*/
- control_block->next = d->control_block_base_phys +
- sizeof(struct bcm2835_dma_cb)
- * ((frame + 1) % d->frames);
+ control_block->next = d->cb_list[((frame + 1) % d->frames)].paddr;
}
return vchan_tx_prep(&c->vc, &d->vd, flags);
+error_cb:
+ i--;
+ for (; i >= 0; i--) {
+ struct bcm2835_cb_entry *cb_entry = &d->cb_list[i];
+
+ dma_pool_free(c->cb_pool, cb_entry->cb, cb_entry->paddr);
+ }
+
+ kfree(d->cb_list);
+ kfree(d);
+ return NULL;
}
static int bcm2835_dma_slave_config(struct dma_chan *chan,
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 6b03e4e84e6b..16fe773fb846 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -107,7 +107,7 @@
/* CCCFG register */
#define GET_NUM_DMACH(x) (x & 0x7) /* bits 0-2 */
-#define GET_NUM_QDMACH(x) (x & 0x70 >> 4) /* bits 4-6 */
+#define GET_NUM_QDMACH(x) ((x & 0x70) >> 4) /* bits 4-6 */
#define GET_NUM_PAENTRY(x) ((x & 0x7000) >> 12) /* bits 12-14 */
#define GET_NUM_EVQUE(x) ((x & 0x70000) >> 16) /* bits 16-18 */
#define GET_NUM_REGN(x) ((x & 0x300000) >> 20) /* bits 20-21 */
@@ -1565,7 +1565,7 @@ static void edma_tc_set_pm_state(struct edma_tc *tc, bool enable)
struct platform_device *tc_pdev;
int ret;
- if (!tc)
+ if (!IS_ENABLED(CONFIG_OF) || !tc)
return;
tc_pdev = of_find_device_by_node(tc->node);
@@ -1752,16 +1752,14 @@ static enum dma_status edma_tx_status(struct dma_chan *chan,
return ret;
}
-static bool edma_is_memcpy_channel(int ch_num, u16 *memcpy_channels)
+static bool edma_is_memcpy_channel(int ch_num, s32 *memcpy_channels)
{
- s16 *memcpy_ch = memcpy_channels;
-
if (!memcpy_channels)
return false;
- while (*memcpy_ch != -1) {
- if (*memcpy_ch == ch_num)
+ while (*memcpy_channels != -1) {
+ if (*memcpy_channels == ch_num)
return true;
- memcpy_ch++;
+ memcpy_channels++;
}
return false;
}
@@ -1775,7 +1773,7 @@ static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode)
{
struct dma_device *s_ddev = &ecc->dma_slave;
struct dma_device *m_ddev = NULL;
- s16 *memcpy_channels = ecc->info->memcpy_channels;
+ s32 *memcpy_channels = ecc->info->memcpy_channels;
int i, j;
dma_cap_zero(s_ddev->cap_mask);
@@ -1996,16 +1994,16 @@ static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
prop = of_find_property(dev->of_node, "ti,edma-memcpy-channels", &sz);
if (prop) {
const char pname[] = "ti,edma-memcpy-channels";
- size_t nelm = sz / sizeof(s16);
- s16 *memcpy_ch;
+ size_t nelm = sz / sizeof(s32);
+ s32 *memcpy_ch;
- memcpy_ch = devm_kcalloc(dev, nelm + 1, sizeof(s16),
+ memcpy_ch = devm_kcalloc(dev, nelm + 1, sizeof(s32),
GFP_KERNEL);
if (!memcpy_ch)
return ERR_PTR(-ENOMEM);
- ret = of_property_read_u16_array(dev->of_node, pname,
- (u16 *)memcpy_ch, nelm);
+ ret = of_property_read_u32_array(dev->of_node, pname,
+ (u32 *)memcpy_ch, nelm);
if (ret)
return ERR_PTR(ret);
@@ -2017,31 +2015,50 @@ static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
&sz);
if (prop) {
const char pname[] = "ti,edma-reserved-slot-ranges";
+ u32 (*tmp)[2];
s16 (*rsv_slots)[2];
- size_t nelm = sz / sizeof(*rsv_slots);
+ size_t nelm = sz / sizeof(*tmp);
struct edma_rsv_info *rsv_info;
+ int i;
if (!nelm)
return info;
+ tmp = kcalloc(nelm, sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return ERR_PTR(-ENOMEM);
+
rsv_info = devm_kzalloc(dev, sizeof(*rsv_info), GFP_KERNEL);
- if (!rsv_info)
+ if (!rsv_info) {
+ kfree(tmp);
return ERR_PTR(-ENOMEM);
+ }
rsv_slots = devm_kcalloc(dev, nelm + 1, sizeof(*rsv_slots),
GFP_KERNEL);
- if (!rsv_slots)
+ if (!rsv_slots) {
+ kfree(tmp);
return ERR_PTR(-ENOMEM);
+ }
- ret = of_property_read_u16_array(dev->of_node, pname,
- (u16 *)rsv_slots, nelm * 2);
- if (ret)
+ ret = of_property_read_u32_array(dev->of_node, pname,
+ (u32 *)tmp, nelm * 2);
+ if (ret) {
+ kfree(tmp);
return ERR_PTR(ret);
+ }
+ for (i = 0; i < nelm; i++) {
+ rsv_slots[i][0] = tmp[i][0];
+ rsv_slots[i][1] = tmp[i][1];
+ }
rsv_slots[nelm][0] = -1;
rsv_slots[nelm][1] = -1;
+
info->rsv = rsv_info;
info->rsv->rsv_slots = (const s16 (*)[2])rsv_slots;
+
+ kfree(tmp);
}
return info;
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 7058d58ba588..0f6fd42f55ca 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1462,7 +1462,7 @@ err_firmware:
#define EVENT_REMAP_CELLS 3
-static int __init sdma_event_remap(struct sdma_engine *sdma)
+static int sdma_event_remap(struct sdma_engine *sdma)
{
struct device_node *np = sdma->dev->of_node;
struct device_node *gpr_np = of_parse_phandle(np, "gpr", 0);
diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c
index ebd8a5f398b0..f1bcc2a163b3 100644
--- a/drivers/dma/sh/usb-dmac.c
+++ b/drivers/dma/sh/usb-dmac.c
@@ -679,8 +679,11 @@ static int usb_dmac_runtime_suspend(struct device *dev)
struct usb_dmac *dmac = dev_get_drvdata(dev);
int i;
- for (i = 0; i < dmac->n_channels; ++i)
+ for (i = 0; i < dmac->n_channels; ++i) {
+ if (!dmac->channels[i].iomem)
+ break;
usb_dmac_chan_halt(&dmac->channels[i]);
+ }
return 0;
}
@@ -799,11 +802,10 @@ static int usb_dmac_probe(struct platform_device *pdev)
ret = pm_runtime_get_sync(&pdev->dev);
if (ret < 0) {
dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
- return ret;
+ goto error_pm;
}
ret = usb_dmac_init(dmac);
- pm_runtime_put(&pdev->dev);
if (ret) {
dev_err(&pdev->dev, "failed to reset device\n");
@@ -851,10 +853,13 @@ static int usb_dmac_probe(struct platform_device *pdev)
if (ret < 0)
goto error;
+ pm_runtime_put(&pdev->dev);
return 0;
error:
of_dma_controller_free(pdev->dev.of_node);
+ pm_runtime_put(&pdev->dev);
+error_pm:
pm_runtime_disable(&pdev->dev);
return ret;
}
diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c
index 9dfa2b0fa5da..9cb93c5b655d 100644
--- a/drivers/dma/xgene-dma.c
+++ b/drivers/dma/xgene-dma.c
@@ -29,6 +29,7 @@
#include <linux/dmapool.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/irq.h>
#include <linux/module.h>
#include <linux/of_device.h>
@@ -1610,6 +1611,7 @@ static int xgene_dma_request_irqs(struct xgene_dma *pdma)
/* Register DMA channel rx irq */
for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
chan = &pdma->chan[i];
+ irq_set_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
ret = devm_request_irq(chan->dev, chan->rx_irq,
xgene_dma_chan_ring_isr,
0, chan->name, chan);
@@ -1620,6 +1622,7 @@ static int xgene_dma_request_irqs(struct xgene_dma *pdma)
for (j = 0; j < i; j++) {
chan = &pdma->chan[i];
+ irq_clear_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
devm_free_irq(chan->dev, chan->rx_irq, chan);
}
@@ -1640,6 +1643,7 @@ static void xgene_dma_free_irqs(struct xgene_dma *pdma)
for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
chan = &pdma->chan[i];
+ irq_clear_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
devm_free_irq(chan->dev, chan->rx_irq, chan);
}
}
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index ac1ce4a73edf..0e08e665f715 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -521,6 +521,7 @@ static int __init dmi_present(const u8 *buf)
dmi_ver = smbios_ver;
else
dmi_ver = (buf[14] & 0xF0) << 4 | (buf[14] & 0x0F);
+ dmi_ver <<= 8;
dmi_num = get_unaligned_le16(buf + 12);
dmi_len = get_unaligned_le16(buf + 6);
dmi_base = get_unaligned_le32(buf + 8);
@@ -528,15 +529,14 @@ static int __init dmi_present(const u8 *buf)
if (dmi_walk_early(dmi_decode) == 0) {
if (smbios_ver) {
pr_info("SMBIOS %d.%d present.\n",
- dmi_ver >> 8, dmi_ver & 0xFF);
+ dmi_ver >> 16, (dmi_ver >> 8) & 0xFF);
} else {
smbios_entry_point_size = 15;
memcpy(smbios_entry_point, buf,
smbios_entry_point_size);
pr_info("Legacy DMI %d.%d present.\n",
- dmi_ver >> 8, dmi_ver & 0xFF);
+ dmi_ver >> 16, (dmi_ver >> 8) & 0xFF);
}
- dmi_ver <<= 8;
dmi_format_ids(dmi_ids_string, sizeof(dmi_ids_string));
printk(KERN_DEBUG "DMI: %s\n", dmi_ids_string);
return 0;
diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c
index a24f5cb877e0..953dc9195937 100644
--- a/drivers/fpga/fpga-mgr.c
+++ b/drivers/fpga/fpga-mgr.c
@@ -122,12 +122,10 @@ int fpga_mgr_firmware_load(struct fpga_manager *mgr, u32 flags,
}
ret = fpga_mgr_buf_load(mgr, flags, fw->data, fw->size);
- if (ret)
- return ret;
release_firmware(fw);
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(fpga_mgr_firmware_load);
@@ -256,7 +254,6 @@ int fpga_mgr_register(struct device *dev, const char *name,
void *priv)
{
struct fpga_manager *mgr;
- const char *dt_label;
int id, ret;
if (!mops || !mops->write_init || !mops->write ||
@@ -300,11 +297,9 @@ int fpga_mgr_register(struct device *dev, const char *name,
mgr->dev.id = id;
dev_set_drvdata(dev, mgr);
- dt_label = of_get_property(mgr->dev.of_node, "label", NULL);
- if (dt_label)
- ret = dev_set_name(&mgr->dev, "%s", dt_label);
- else
- ret = dev_set_name(&mgr->dev, "fpga%d", id);
+ ret = dev_set_name(&mgr->dev, "fpga%d", id);
+ if (ret)
+ goto error_device;
ret = device_add(&mgr->dev);
if (ret)
diff --git a/drivers/gpio/gpio-74xx-mmio.c b/drivers/gpio/gpio-74xx-mmio.c
index 6ed7c0fb3378..6b186829087c 100644
--- a/drivers/gpio/gpio-74xx-mmio.c
+++ b/drivers/gpio/gpio-74xx-mmio.c
@@ -113,13 +113,16 @@ static int mmio_74xx_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
static int mmio_74xx_gpio_probe(struct platform_device *pdev)
{
- const struct of_device_id *of_id =
- of_match_device(mmio_74xx_gpio_ids, &pdev->dev);
+ const struct of_device_id *of_id;
struct mmio_74xx_gpio_priv *priv;
struct resource *res;
void __iomem *dat;
int err;
+ of_id = of_match_device(mmio_74xx_gpio_ids, &pdev->dev);
+ if (!of_id)
+ return -ENODEV;
+
priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
diff --git a/drivers/gpio/gpio-ath79.c b/drivers/gpio/gpio-ath79.c
index e5827a56ff3b..5eaea8b812cf 100644
--- a/drivers/gpio/gpio-ath79.c
+++ b/drivers/gpio/gpio-ath79.c
@@ -113,7 +113,7 @@ static int ar934x_gpio_direction_output(struct gpio_chip *chip, unsigned offset,
__raw_writel(BIT(offset), ctrl->base + AR71XX_GPIO_REG_CLEAR);
__raw_writel(
- __raw_readl(ctrl->base + AR71XX_GPIO_REG_OE) & BIT(offset),
+ __raw_readl(ctrl->base + AR71XX_GPIO_REG_OE) & ~BIT(offset),
ctrl->base + AR71XX_GPIO_REG_OE);
spin_unlock_irqrestore(&ctrl->lock, flags);
diff --git a/drivers/gpio/gpio-generic.c b/drivers/gpio/gpio-generic.c
index bd5193c67a9c..88ae70ddb127 100644
--- a/drivers/gpio/gpio-generic.c
+++ b/drivers/gpio/gpio-generic.c
@@ -141,9 +141,9 @@ static int bgpio_get_set(struct gpio_chip *gc, unsigned int gpio)
unsigned long pinmask = bgc->pin2mask(bgc, gpio);
if (bgc->dir & pinmask)
- return bgc->read_reg(bgc->reg_set) & pinmask;
+ return !!(bgc->read_reg(bgc->reg_set) & pinmask);
else
- return bgc->read_reg(bgc->reg_dat) & pinmask;
+ return !!(bgc->read_reg(bgc->reg_dat) & pinmask);
}
static int bgpio_get(struct gpio_chip *gc, unsigned int gpio)
diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index 56d2d026e62e..f7fbb46d5d79 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -1122,8 +1122,6 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
/* MPUIO is a bit different, reading IRQ status clears it */
if (bank->is_mpuio) {
irqc->irq_ack = dummy_irq_chip.irq_ack;
- irqc->irq_mask = irq_gc_mask_set_bit;
- irqc->irq_unmask = irq_gc_mask_clr_bit;
if (!bank->regs->wkup_en)
irqc->irq_set_wake = NULL;
}
diff --git a/drivers/gpio/gpio-palmas.c b/drivers/gpio/gpio-palmas.c
index 171a6389f9ce..52b447c071cb 100644
--- a/drivers/gpio/gpio-palmas.c
+++ b/drivers/gpio/gpio-palmas.c
@@ -167,6 +167,8 @@ static int palmas_gpio_probe(struct platform_device *pdev)
const struct palmas_device_data *dev_data;
match = of_match_device(of_palmas_gpio_match, &pdev->dev);
+ if (!match)
+ return -ENODEV;
dev_data = match->data;
if (!dev_data)
dev_data = &palmas_dev_data;
diff --git a/drivers/gpio/gpio-syscon.c b/drivers/gpio/gpio-syscon.c
index 045a952576c7..7b25fdf64802 100644
--- a/drivers/gpio/gpio-syscon.c
+++ b/drivers/gpio/gpio-syscon.c
@@ -187,11 +187,15 @@ MODULE_DEVICE_TABLE(of, syscon_gpio_ids);
static int syscon_gpio_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
- const struct of_device_id *of_id = of_match_device(syscon_gpio_ids, dev);
+ const struct of_device_id *of_id;
struct syscon_gpio_priv *priv;
struct device_node *np = dev->of_node;
int ret;
+ of_id = of_match_device(syscon_gpio_ids, dev);
+ if (!of_id)
+ return -ENODEV;
+
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index 027e5f47dd28..896bf29776b0 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -375,6 +375,60 @@ static int tegra_gpio_irq_set_wake(struct irq_data *d, unsigned int enable)
}
#endif
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+static int dbg_gpio_show(struct seq_file *s, void *unused)
+{
+ int i;
+ int j;
+
+ for (i = 0; i < tegra_gpio_bank_count; i++) {
+ for (j = 0; j < 4; j++) {
+ int gpio = tegra_gpio_compose(i, j, 0);
+ seq_printf(s,
+ "%d:%d %02x %02x %02x %02x %02x %02x %06x\n",
+ i, j,
+ tegra_gpio_readl(GPIO_CNF(gpio)),
+ tegra_gpio_readl(GPIO_OE(gpio)),
+ tegra_gpio_readl(GPIO_OUT(gpio)),
+ tegra_gpio_readl(GPIO_IN(gpio)),
+ tegra_gpio_readl(GPIO_INT_STA(gpio)),
+ tegra_gpio_readl(GPIO_INT_ENB(gpio)),
+ tegra_gpio_readl(GPIO_INT_LVL(gpio)));
+ }
+ }
+ return 0;
+}
+
+static int dbg_gpio_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, dbg_gpio_show, &inode->i_private);
+}
+
+static const struct file_operations debug_fops = {
+ .open = dbg_gpio_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void tegra_gpio_debuginit(void)
+{
+ (void) debugfs_create_file("tegra_gpio", S_IRUGO,
+ NULL, NULL, &debug_fops);
+}
+
+#else
+
+static inline void tegra_gpio_debuginit(void)
+{
+}
+
+#endif
+
static struct irq_chip tegra_gpio_irq_chip = {
.name = "GPIO",
.irq_ack = tegra_gpio_irq_ack,
@@ -519,6 +573,8 @@ static int tegra_gpio_probe(struct platform_device *pdev)
spin_lock_init(&bank->lvl_lock[j]);
}
+ tegra_gpio_debuginit();
+
return 0;
}
@@ -536,52 +592,3 @@ static int __init tegra_gpio_init(void)
return platform_driver_register(&tegra_gpio_driver);
}
postcore_initcall(tegra_gpio_init);
-
-#ifdef CONFIG_DEBUG_FS
-
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-static int dbg_gpio_show(struct seq_file *s, void *unused)
-{
- int i;
- int j;
-
- for (i = 0; i < tegra_gpio_bank_count; i++) {
- for (j = 0; j < 4; j++) {
- int gpio = tegra_gpio_compose(i, j, 0);
- seq_printf(s,
- "%d:%d %02x %02x %02x %02x %02x %02x %06x\n",
- i, j,
- tegra_gpio_readl(GPIO_CNF(gpio)),
- tegra_gpio_readl(GPIO_OE(gpio)),
- tegra_gpio_readl(GPIO_OUT(gpio)),
- tegra_gpio_readl(GPIO_IN(gpio)),
- tegra_gpio_readl(GPIO_INT_STA(gpio)),
- tegra_gpio_readl(GPIO_INT_ENB(gpio)),
- tegra_gpio_readl(GPIO_INT_LVL(gpio)));
- }
- }
- return 0;
-}
-
-static int dbg_gpio_open(struct inode *inode, struct file *file)
-{
- return single_open(file, dbg_gpio_show, &inode->i_private);
-}
-
-static const struct file_operations debug_fops = {
- .open = dbg_gpio_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int __init tegra_gpio_debuginit(void)
-{
- (void) debugfs_create_file("tegra_gpio", S_IRUGO,
- NULL, NULL, &debug_fops);
- return 0;
-}
-late_initcall(tegra_gpio_debuginit);
-#endif
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index a18f00fc1bb8..4e4c3083ae56 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -233,7 +233,7 @@ static struct gpio_desc *gpio_name_to_desc(const char * const name)
for (i = 0; i != chip->ngpio; ++i) {
struct gpio_desc *gpio = &chip->desc[i];
- if (!gpio->name)
+ if (!gpio->name || !name)
continue;
if (!strcmp(gpio->name, name)) {
@@ -1279,7 +1279,13 @@ static int _gpiod_get_raw_value(const struct gpio_desc *desc)
chip = desc->chip;
offset = gpio_chip_hwgpio(desc);
value = chip->get ? chip->get(chip, offset) : -EIO;
- value = value < 0 ? value : !!value;
+ /*
+ * FIXME: fix all drivers to clamp to [0,1] or return negative,
+ * then change this to:
+ * value = value < 0 ? value : !!value;
+ * so we can properly propagate error codes.
+ */
+ value = !!value;
trace_gpio_value(desc_to_gpio(desc), 1, value);
return value;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 615ce6d464fb..048cfe073dae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -389,7 +389,6 @@ struct amdgpu_clock {
* Fences.
*/
struct amdgpu_fence_driver {
- struct amdgpu_ring *ring;
uint64_t gpu_addr;
volatile uint32_t *cpu_addr;
/* sync_seq is protected by ring emission lock */
@@ -398,7 +397,7 @@ struct amdgpu_fence_driver {
bool initialized;
struct amdgpu_irq_src *irq_src;
unsigned irq_type;
- struct delayed_work lockup_work;
+ struct timer_list fallback_timer;
wait_queue_head_t fence_queue;
};
@@ -497,6 +496,7 @@ struct amdgpu_bo_va_mapping {
/* bo virtual addresses in a specific vm */
struct amdgpu_bo_va {
+ struct mutex mutex;
/* protected by bo being reserved */
struct list_head bo_list;
struct fence *last_pt_update;
@@ -539,6 +539,7 @@ struct amdgpu_bo {
/* Constant after initialization */
struct amdgpu_device *adev;
struct drm_gem_object gem_base;
+ struct amdgpu_bo *parent;
struct ttm_bo_kmap_obj dma_buf_vmap;
pid_t pid;
@@ -917,8 +918,8 @@ struct amdgpu_ring {
#define AMDGPU_VM_FAULT_STOP_ALWAYS 2
struct amdgpu_vm_pt {
- struct amdgpu_bo *bo;
- uint64_t addr;
+ struct amdgpu_bo *bo;
+ uint64_t addr;
};
struct amdgpu_vm_id {
@@ -926,13 +927,9 @@ struct amdgpu_vm_id {
uint64_t pd_gpu_addr;
/* last flushed PD/PT update */
struct fence *flushed_updates;
- /* last use of vmid */
- struct fence *last_id_use;
};
struct amdgpu_vm {
- struct mutex mutex;
-
struct rb_root va;
/* protecting invalidated */
@@ -957,24 +954,72 @@ struct amdgpu_vm {
/* for id and flush management per ring */
struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS];
+ /* for interval tree */
+ spinlock_t it_lock;
+ /* protecting freed */
+ spinlock_t freed_lock;
};
struct amdgpu_vm_manager {
- struct fence *active[AMDGPU_NUM_VM];
- uint32_t max_pfn;
+ struct {
+ struct fence *active;
+ atomic_long_t owner;
+ } ids[AMDGPU_NUM_VM];
+
+ uint32_t max_pfn;
/* number of VMIDs */
- unsigned nvm;
+ unsigned nvm;
/* vram base address for page table entry */
- u64 vram_base_offset;
+ u64 vram_base_offset;
/* is vm enabled? */
- bool enabled;
- /* for hw to save the PD addr on suspend/resume */
- uint32_t saved_table_addr[AMDGPU_NUM_VM];
+ bool enabled;
/* vm pte handling */
const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
struct amdgpu_ring *vm_pte_funcs_ring;
};
+void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct list_head *head);
+int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+ struct amdgpu_sync *sync);
+void amdgpu_vm_flush(struct amdgpu_ring *ring,
+ struct amdgpu_vm *vm,
+ struct fence *updates);
+void amdgpu_vm_fence(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct fence *fence);
+uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr);
+int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm);
+int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm);
+int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_sync *sync);
+int amdgpu_vm_bo_update(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ struct ttm_mem_reg *mem);
+void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo);
+struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo);
+struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo);
+int amdgpu_vm_bo_map(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ uint64_t addr, uint64_t offset,
+ uint64_t size, uint32_t flags);
+int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va,
+ uint64_t addr);
+void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
+ struct amdgpu_bo_va *bo_va);
+int amdgpu_vm_free_job(struct amdgpu_job *job);
+
/*
* context related structures
*/
@@ -1211,6 +1256,7 @@ struct amdgpu_cs_parser {
/* relocations */
struct amdgpu_bo_list_entry *vm_bos;
struct list_head validated;
+ struct fence *fence;
struct amdgpu_ib *ibs;
uint32_t num_ibs;
@@ -1218,7 +1264,8 @@ struct amdgpu_cs_parser {
struct ww_acquire_ctx ticket;
/* user fence */
- struct amdgpu_user_fence uf;
+ struct amdgpu_user_fence uf;
+ struct amdgpu_bo_list_entry uf_entry;
};
struct amdgpu_job {
@@ -1226,7 +1273,7 @@ struct amdgpu_job {
struct amdgpu_device *adev;
struct amdgpu_ib *ibs;
uint32_t num_ibs;
- struct mutex job_lock;
+ void *owner;
struct amdgpu_user_fence uf;
int (*free_job)(struct amdgpu_job *job);
};
@@ -2257,11 +2304,6 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev);
bool amdgpu_card_posted(struct amdgpu_device *adev);
void amdgpu_update_display_priority(struct amdgpu_device *adev);
bool amdgpu_boot_test_post_card(struct amdgpu_device *adev);
-struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
- struct drm_file *filp,
- struct amdgpu_ctx *ctx,
- struct amdgpu_ib *ibs,
- uint32_t num_ibs);
int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
@@ -2319,49 +2361,6 @@ long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg);
/*
- * vm
- */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
-void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
-struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct list_head *head);
-int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
- struct amdgpu_sync *sync);
-void amdgpu_vm_flush(struct amdgpu_ring *ring,
- struct amdgpu_vm *vm,
- struct fence *updates);
-void amdgpu_vm_fence(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_fence *fence);
-uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr);
-int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
-int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
- struct amdgpu_vm *vm);
-int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
- struct amdgpu_vm *vm, struct amdgpu_sync *sync);
-int amdgpu_vm_bo_update(struct amdgpu_device *adev,
- struct amdgpu_bo_va *bo_va,
- struct ttm_mem_reg *mem);
-void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
- struct amdgpu_bo *bo);
-struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
- struct amdgpu_bo *bo);
-struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_bo *bo);
-int amdgpu_vm_bo_map(struct amdgpu_device *adev,
- struct amdgpu_bo_va *bo_va,
- uint64_t addr, uint64_t offset,
- uint64_t size, uint32_t flags);
-int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
- struct amdgpu_bo_va *bo_va,
- uint64_t addr);
-void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
- struct amdgpu_bo_va *bo_va);
-int amdgpu_vm_free_job(struct amdgpu_job *job);
-/*
* functions used by amdgpu_encoder.c
*/
struct amdgpu_afmt_acr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dfc4d02c7a38..25a3e2485cc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -127,28 +127,35 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
return 0;
}
-struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
- struct drm_file *filp,
- struct amdgpu_ctx *ctx,
- struct amdgpu_ib *ibs,
- uint32_t num_ibs)
+static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_fence *fence_data)
{
- struct amdgpu_cs_parser *parser;
- int i;
+ struct drm_gem_object *gobj;
+ uint32_t handle;
+
+ handle = fence_data->handle;
+ gobj = drm_gem_object_lookup(p->adev->ddev, p->filp,
+ fence_data->handle);
+ if (gobj == NULL)
+ return -EINVAL;
+
+ p->uf.bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+ p->uf.offset = fence_data->offset;
- parser = kzalloc(sizeof(struct amdgpu_cs_parser), GFP_KERNEL);
- if (!parser)
- return NULL;
+ if (amdgpu_ttm_tt_has_userptr(p->uf.bo->tbo.ttm)) {
+ drm_gem_object_unreference_unlocked(gobj);
+ return -EINVAL;
+ }
- parser->adev = adev;
- parser->filp = filp;
- parser->ctx = ctx;
- parser->ibs = ibs;
- parser->num_ibs = num_ibs;
- for (i = 0; i < num_ibs; i++)
- ibs[i].ctx = ctx;
+ p->uf_entry.robj = amdgpu_bo_ref(p->uf.bo);
+ p->uf_entry.prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
+ p->uf_entry.allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+ p->uf_entry.priority = 0;
+ p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
+ p->uf_entry.tv.shared = true;
- return parser;
+ drm_gem_object_unreference_unlocked(gobj);
+ return 0;
}
int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
@@ -231,26 +238,15 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
case AMDGPU_CHUNK_ID_FENCE:
size = sizeof(struct drm_amdgpu_cs_chunk_fence);
- if (p->chunks[i].length_dw * sizeof(uint32_t) >= size) {
- uint32_t handle;
- struct drm_gem_object *gobj;
- struct drm_amdgpu_cs_chunk_fence *fence_data;
-
- fence_data = (void *)p->chunks[i].kdata;
- handle = fence_data->handle;
- gobj = drm_gem_object_lookup(p->adev->ddev,
- p->filp, handle);
- if (gobj == NULL) {
- ret = -EINVAL;
- goto free_partial_kdata;
- }
-
- p->uf.bo = gem_to_amdgpu_bo(gobj);
- p->uf.offset = fence_data->offset;
- } else {
+ if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
ret = -EINVAL;
goto free_partial_kdata;
}
+
+ ret = amdgpu_cs_user_fence_chunk(p, (void *)p->chunks[i].kdata);
+ if (ret)
+ goto free_partial_kdata;
+
break;
case AMDGPU_CHUNK_ID_DEPENDENCIES:
@@ -413,6 +409,9 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p)
p->vm_bos = amdgpu_vm_get_bos(p->adev, &fpriv->vm,
&p->validated);
+ if (p->uf.bo)
+ list_add(&p->uf_entry.tv.head, &p->validated);
+
if (need_mmap_lock)
down_read(&current->mm->mmap_sem);
@@ -463,8 +462,18 @@ static int cmp_size_smaller_first(void *priv, struct list_head *a,
return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
}
-static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff)
+/**
+ * cs_parser_fini() - clean parser states
+ * @parser: parser structure holding parsing context.
+ * @error: error number
+ *
+ * If error is set than unvalidate buffer, otherwise just free memory
+ * used by parsing context.
+ **/
+static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
{
+ unsigned i;
+
if (!error) {
/* Sort the buffer list from the smallest to largest buffer,
* which affects the order of buffers in the LRU list.
@@ -479,17 +488,14 @@ static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int err
list_sort(NULL, &parser->validated, cmp_size_smaller_first);
ttm_eu_fence_buffer_objects(&parser->ticket,
- &parser->validated,
- &parser->ibs[parser->num_ibs-1].fence->base);
+ &parser->validated,
+ parser->fence);
} else if (backoff) {
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
}
-}
+ fence_put(parser->fence);
-static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
-{
- unsigned i;
if (parser->ctx)
amdgpu_ctx_put(parser->ctx);
if (parser->bo_list)
@@ -499,31 +505,12 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
for (i = 0; i < parser->nchunks; i++)
drm_free_large(parser->chunks[i].kdata);
kfree(parser->chunks);
- if (!amdgpu_enable_scheduler)
- {
- if (parser->ibs)
- for (i = 0; i < parser->num_ibs; i++)
- amdgpu_ib_free(parser->adev, &parser->ibs[i]);
- kfree(parser->ibs);
- if (parser->uf.bo)
- drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
- }
-
- kfree(parser);
-}
-
-/**
- * cs_parser_fini() - clean parser states
- * @parser: parser structure holding parsing context.
- * @error: error number
- *
- * If error is set than unvalidate buffer, otherwise just free memory
- * used by parsing context.
- **/
-static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
-{
- amdgpu_cs_parser_fini_early(parser, error, backoff);
- amdgpu_cs_parser_fini_late(parser);
+ if (parser->ibs)
+ for (i = 0; i < parser->num_ibs; i++)
+ amdgpu_ib_free(parser->adev, &parser->ibs[i]);
+ kfree(parser->ibs);
+ amdgpu_bo_unref(&parser->uf.bo);
+ amdgpu_bo_unref(&parser->uf_entry.robj);
}
static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
@@ -610,15 +597,9 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
}
r = amdgpu_bo_vm_update_pte(parser, vm);
- if (r) {
- goto out;
- }
- amdgpu_cs_sync_rings(parser);
- if (!amdgpu_enable_scheduler)
- r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs,
- parser->filp);
+ if (!r)
+ amdgpu_cs_sync_rings(parser);
-out:
return r;
}
@@ -818,7 +799,7 @@ static int amdgpu_cs_free_job(struct amdgpu_job *job)
amdgpu_ib_free(job->adev, &job->ibs[i]);
kfree(job->ibs);
if (job->uf.bo)
- drm_gem_object_unreference_unlocked(&job->uf.bo->gem_base);
+ amdgpu_bo_unref(&job->uf.bo);
return 0;
}
@@ -826,38 +807,35 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdgpu_device *adev = dev->dev_private;
union drm_amdgpu_cs *cs = data;
- struct amdgpu_fpriv *fpriv = filp->driver_priv;
- struct amdgpu_vm *vm = &fpriv->vm;
- struct amdgpu_cs_parser *parser;
+ struct amdgpu_cs_parser parser = {};
bool reserved_buffers = false;
int i, r;
if (!adev->accel_working)
return -EBUSY;
- parser = amdgpu_cs_parser_create(adev, filp, NULL, NULL, 0);
- if (!parser)
- return -ENOMEM;
- r = amdgpu_cs_parser_init(parser, data);
+ parser.adev = adev;
+ parser.filp = filp;
+
+ r = amdgpu_cs_parser_init(&parser, data);
if (r) {
DRM_ERROR("Failed to initialize parser !\n");
- amdgpu_cs_parser_fini(parser, r, false);
+ amdgpu_cs_parser_fini(&parser, r, false);
r = amdgpu_cs_handle_lockup(adev, r);
return r;
}
- mutex_lock(&vm->mutex);
- r = amdgpu_cs_parser_relocs(parser);
+ r = amdgpu_cs_parser_relocs(&parser);
if (r == -ENOMEM)
DRM_ERROR("Not enough memory for command submission!\n");
else if (r && r != -ERESTARTSYS)
DRM_ERROR("Failed to process the buffer list %d!\n", r);
else if (!r) {
reserved_buffers = true;
- r = amdgpu_cs_ib_fill(adev, parser);
+ r = amdgpu_cs_ib_fill(adev, &parser);
}
if (!r) {
- r = amdgpu_cs_dependencies(adev, parser);
+ r = amdgpu_cs_dependencies(adev, &parser);
if (r)
DRM_ERROR("Failed in the dependencies handling %d!\n", r);
}
@@ -865,63 +843,71 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (r)
goto out;
- for (i = 0; i < parser->num_ibs; i++)
- trace_amdgpu_cs(parser, i);
+ for (i = 0; i < parser.num_ibs; i++)
+ trace_amdgpu_cs(&parser, i);
- r = amdgpu_cs_ib_vm_chunk(adev, parser);
+ r = amdgpu_cs_ib_vm_chunk(adev, &parser);
if (r)
goto out;
- if (amdgpu_enable_scheduler && parser->num_ibs) {
+ if (amdgpu_enable_scheduler && parser.num_ibs) {
+ struct amdgpu_ring * ring = parser.ibs->ring;
+ struct amd_sched_fence *fence;
struct amdgpu_job *job;
- struct amdgpu_ring * ring = parser->ibs->ring;
+
job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
if (!job) {
r = -ENOMEM;
goto out;
}
+
job->base.sched = &ring->sched;
- job->base.s_entity = &parser->ctx->rings[ring->idx].entity;
- job->adev = parser->adev;
- job->ibs = parser->ibs;
- job->num_ibs = parser->num_ibs;
- job->base.owner = parser->filp;
- mutex_init(&job->job_lock);
+ job->base.s_entity = &parser.ctx->rings[ring->idx].entity;
+ job->adev = parser.adev;
+ job->owner = parser.filp;
+ job->free_job = amdgpu_cs_free_job;
+
+ job->ibs = parser.ibs;
+ job->num_ibs = parser.num_ibs;
+ parser.ibs = NULL;
+ parser.num_ibs = 0;
+
if (job->ibs[job->num_ibs - 1].user) {
- memcpy(&job->uf, &parser->uf,
- sizeof(struct amdgpu_user_fence));
+ job->uf = parser.uf;
job->ibs[job->num_ibs - 1].user = &job->uf;
+ parser.uf.bo = NULL;
}
- job->free_job = amdgpu_cs_free_job;
- mutex_lock(&job->job_lock);
- r = amd_sched_entity_push_job(&job->base);
- if (r) {
- mutex_unlock(&job->job_lock);
+ fence = amd_sched_fence_create(job->base.s_entity,
+ parser.filp);
+ if (!fence) {
+ r = -ENOMEM;
amdgpu_cs_free_job(job);
kfree(job);
goto out;
}
- cs->out.handle =
- amdgpu_ctx_add_fence(parser->ctx, ring,
- &job->base.s_fence->base);
- parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle;
+ job->base.s_fence = fence;
+ parser.fence = fence_get(&fence->base);
- list_sort(NULL, &parser->validated, cmp_size_smaller_first);
- ttm_eu_fence_buffer_objects(&parser->ticket,
- &parser->validated,
- &job->base.s_fence->base);
+ cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring,
+ &fence->base);
+ job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
- mutex_unlock(&job->job_lock);
- amdgpu_cs_parser_fini_late(parser);
- mutex_unlock(&vm->mutex);
- return 0;
+ trace_amdgpu_cs_ioctl(job);
+ amd_sched_entity_push_job(&job->base);
+
+ } else {
+ struct amdgpu_fence *fence;
+
+ r = amdgpu_ib_schedule(adev, parser.num_ibs, parser.ibs,
+ parser.filp);
+ fence = parser.ibs[parser.num_ibs - 1].fence;
+ parser.fence = fence_get(&fence->base);
+ cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence;
}
- cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence;
out:
- amdgpu_cs_parser_fini(parser, r, reserved_buffers);
- mutex_unlock(&vm->mutex);
+ amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
r = amdgpu_cs_handle_lockup(adev, r);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index e173a5a02f0d..5580d3420c3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -73,6 +73,8 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
struct drm_crtc *crtc = &amdgpuCrtc->base;
unsigned long flags;
unsigned i;
+ int vpos, hpos, stat, min_udelay;
+ struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
amdgpu_flip_wait_fence(adev, &work->excl);
for (i = 0; i < work->shared_count; ++i)
@@ -81,6 +83,41 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
/* We borrow the event spin lock for protecting flip_status */
spin_lock_irqsave(&crtc->dev->event_lock, flags);
+ /* If this happens to execute within the "virtually extended" vblank
+ * interval before the start of the real vblank interval then it needs
+ * to delay programming the mmio flip until the real vblank is entered.
+ * This prevents completing a flip too early due to the way we fudge
+ * our vblank counter and vblank timestamps in order to work around the
+ * problem that the hw fires vblank interrupts before actual start of
+ * vblank (when line buffer refilling is done for a frame). It
+ * complements the fudging logic in amdgpu_get_crtc_scanoutpos() for
+ * timestamping and amdgpu_get_vblank_counter_kms() for vblank counts.
+ *
+ * In practice this won't execute very often unless on very fast
+ * machines because the time window for this to happen is very small.
+ */
+ for (;;) {
+ /* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
+ * start in hpos, and to the "fudged earlier" vblank start in
+ * vpos.
+ */
+ stat = amdgpu_get_crtc_scanoutpos(adev->ddev, work->crtc_id,
+ GET_DISTANCE_TO_VBLANKSTART,
+ &vpos, &hpos, NULL, NULL,
+ &crtc->hwmode);
+
+ if ((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
+ (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE) ||
+ !(vpos >= 0 && hpos <= 0))
+ break;
+
+ /* Sleep at least until estimated real start of hw vblank */
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+ min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
+ usleep_range(min_udelay, 2 * min_udelay);
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
+ };
+
/* do the flip (mmio) */
adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
/* set the flip status */
@@ -109,7 +146,7 @@ static void amdgpu_unpin_work_func(struct work_struct *__work)
} else
DRM_ERROR("failed to reserve buffer after flip\n");
- drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
+ amdgpu_bo_unref(&work->old_rbo);
kfree(work->shared);
kfree(work);
}
@@ -148,8 +185,8 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
obj = old_amdgpu_fb->obj;
/* take a reference to the old object */
- drm_gem_object_reference(obj);
work->old_rbo = gem_to_amdgpu_bo(obj);
+ amdgpu_bo_ref(work->old_rbo);
new_amdgpu_fb = to_amdgpu_framebuffer(fb);
obj = new_amdgpu_fb->obj;
@@ -222,7 +259,7 @@ pflip_cleanup:
amdgpu_bo_unreserve(new_rbo);
cleanup:
- drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
+ amdgpu_bo_unref(&work->old_rbo);
fence_put(work->excl);
for (i = 0; i < work->shared_count; ++i)
fence_put(work->shared[i]);
@@ -712,6 +749,15 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
* \param dev Device to query.
* \param pipe Crtc to query.
* \param flags Flags from caller (DRM_CALLED_FROM_VBLIRQ or 0).
+ * For driver internal use only also supports these flags:
+ *
+ * USE_REAL_VBLANKSTART to use the real start of vblank instead
+ * of a fudged earlier start of vblank.
+ *
+ * GET_DISTANCE_TO_VBLANKSTART to return distance to the
+ * fudged earlier start of vblank in *vpos and the distance
+ * to true start of vblank in *hpos.
+ *
* \param *vpos Location where vertical scanout position should be stored.
* \param *hpos Location where horizontal scanout position should go.
* \param *stime Target location for timestamp taken immediately before
@@ -776,10 +822,40 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
vbl_end = 0;
}
+ /* Called from driver internal vblank counter query code? */
+ if (flags & GET_DISTANCE_TO_VBLANKSTART) {
+ /* Caller wants distance from real vbl_start in *hpos */
+ *hpos = *vpos - vbl_start;
+ }
+
+ /* Fudge vblank to start a few scanlines earlier to handle the
+ * problem that vblank irqs fire a few scanlines before start
+ * of vblank. Some driver internal callers need the true vblank
+ * start to be used and signal this via the USE_REAL_VBLANKSTART flag.
+ *
+ * The cause of the "early" vblank irq is that the irq is triggered
+ * by the line buffer logic when the line buffer read position enters
+ * the vblank, whereas our crtc scanout position naturally lags the
+ * line buffer read position.
+ */
+ if (!(flags & USE_REAL_VBLANKSTART))
+ vbl_start -= adev->mode_info.crtcs[pipe]->lb_vblank_lead_lines;
+
/* Test scanout position against vblank region. */
if ((*vpos < vbl_start) && (*vpos >= vbl_end))
in_vbl = false;
+ /* In vblank? */
+ if (in_vbl)
+ ret |= DRM_SCANOUTPOS_IN_VBLANK;
+
+ /* Called from driver internal vblank counter query code? */
+ if (flags & GET_DISTANCE_TO_VBLANKSTART) {
+ /* Caller wants distance from fudged earlier vbl_start */
+ *vpos -= vbl_start;
+ return ret;
+ }
+
/* Check if inside vblank area and apply corrective offsets:
* vpos will then be >=0 in video scanout area, but negative
* within vblank area, counting down the number of lines until
@@ -795,32 +871,6 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
/* Correct for shifted end of vbl at vbl_end. */
*vpos = *vpos - vbl_end;
- /* In vblank? */
- if (in_vbl)
- ret |= DRM_SCANOUTPOS_IN_VBLANK;
-
- /* Is vpos outside nominal vblank area, but less than
- * 1/100 of a frame height away from start of vblank?
- * If so, assume this isn't a massively delayed vblank
- * interrupt, but a vblank interrupt that fired a few
- * microseconds before true start of vblank. Compensate
- * by adding a full frame duration to the final timestamp.
- * Happens, e.g., on ATI R500, R600.
- *
- * We only do this if DRM_CALLED_FROM_VBLIRQ.
- */
- if ((flags & DRM_CALLED_FROM_VBLIRQ) && !in_vbl) {
- vbl_start = mode->crtc_vdisplay;
- vtotal = mode->crtc_vtotal;
-
- if (vbl_start - *vpos < vtotal / 100) {
- *vpos -= vtotal;
-
- /* Signal this correction as "applied". */
- ret |= 0x8;
- }
- }
-
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 257d72205bb5..3671f9f220bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -47,6 +47,9 @@
* that the the relevant GPU caches have been flushed.
*/
+static struct kmem_cache *amdgpu_fence_slab;
+static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0);
+
/**
* amdgpu_fence_write - write a fence value
*
@@ -85,24 +88,6 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
}
/**
- * amdgpu_fence_schedule_check - schedule lockup check
- *
- * @ring: pointer to struct amdgpu_ring
- *
- * Queues a delayed work item to check for lockups.
- */
-static void amdgpu_fence_schedule_check(struct amdgpu_ring *ring)
-{
- /*
- * Do not reset the timer here with mod_delayed_work,
- * this can livelock in an interaction with TTM delayed destroy.
- */
- queue_delayed_work(system_power_efficient_wq,
- &ring->fence_drv.lockup_work,
- AMDGPU_FENCE_JIFFIES_TIMEOUT);
-}
-
-/**
* amdgpu_fence_emit - emit a fence on the requested ring
*
* @ring: ring the fence is associated with
@@ -118,7 +103,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
struct amdgpu_device *adev = ring->adev;
/* we are protected by the ring emission mutex */
- *fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+ *fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
if ((*fence) == NULL) {
return -ENOMEM;
}
@@ -132,11 +117,23 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
(*fence)->seq,
AMDGPU_FENCE_FLAG_INT);
- trace_amdgpu_fence_emit(ring->adev->ddev, ring->idx, (*fence)->seq);
return 0;
}
/**
+ * amdgpu_fence_schedule_fallback - schedule fallback check
+ *
+ * @ring: pointer to struct amdgpu_ring
+ *
+ * Start a timer as fallback to our interrupts.
+ */
+static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
+{
+ mod_timer(&ring->fence_drv.fallback_timer,
+ jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT);
+}
+
+/**
* amdgpu_fence_activity - check for fence activity
*
* @ring: pointer to struct amdgpu_ring
@@ -202,45 +199,38 @@ static bool amdgpu_fence_activity(struct amdgpu_ring *ring)
} while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
if (seq < last_emitted)
- amdgpu_fence_schedule_check(ring);
+ amdgpu_fence_schedule_fallback(ring);
return wake;
}
/**
- * amdgpu_fence_check_lockup - check for hardware lockup
+ * amdgpu_fence_process - process a fence
*
- * @work: delayed work item
+ * @adev: amdgpu_device pointer
+ * @ring: ring index the fence is associated with
*
- * Checks for fence activity and if there is none probe
- * the hardware if a lockup occured.
+ * Checks the current fence value and wakes the fence queue
+ * if the sequence number has increased (all asics).
*/
-static void amdgpu_fence_check_lockup(struct work_struct *work)
+void amdgpu_fence_process(struct amdgpu_ring *ring)
{
- struct amdgpu_fence_driver *fence_drv;
- struct amdgpu_ring *ring;
-
- fence_drv = container_of(work, struct amdgpu_fence_driver,
- lockup_work.work);
- ring = fence_drv->ring;
-
if (amdgpu_fence_activity(ring))
wake_up_all(&ring->fence_drv.fence_queue);
}
/**
- * amdgpu_fence_process - process a fence
+ * amdgpu_fence_fallback - fallback for hardware interrupts
*
- * @adev: amdgpu_device pointer
- * @ring: ring index the fence is associated with
+ * @work: delayed work item
*
- * Checks the current fence value and wakes the fence queue
- * if the sequence number has increased (all asics).
+ * Checks for fence activity.
*/
-void amdgpu_fence_process(struct amdgpu_ring *ring)
+static void amdgpu_fence_fallback(unsigned long arg)
{
- if (amdgpu_fence_activity(ring))
- wake_up_all(&ring->fence_drv.fence_queue);
+ struct amdgpu_ring *ring = (void *)arg;
+
+ amdgpu_fence_process(ring);
}
/**
@@ -290,7 +280,7 @@ static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
return 0;
- amdgpu_fence_schedule_check(ring);
+ amdgpu_fence_schedule_fallback(ring);
wait_event(ring->fence_drv.fence_queue, (
(signaled = amdgpu_fence_seq_signaled(ring, seq))));
@@ -491,9 +481,8 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
atomic64_set(&ring->fence_drv.last_seq, 0);
ring->fence_drv.initialized = false;
- INIT_DELAYED_WORK(&ring->fence_drv.lockup_work,
- amdgpu_fence_check_lockup);
- ring->fence_drv.ring = ring;
+ setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
+ (unsigned long)ring);
init_waitqueue_head(&ring->fence_drv.fence_queue);
@@ -536,6 +525,13 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
*/
int amdgpu_fence_driver_init(struct amdgpu_device *adev)
{
+ if (atomic_inc_return(&amdgpu_fence_slab_ref) == 1) {
+ amdgpu_fence_slab = kmem_cache_create(
+ "amdgpu_fence", sizeof(struct amdgpu_fence), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!amdgpu_fence_slab)
+ return -ENOMEM;
+ }
if (amdgpu_debugfs_fence_init(adev))
dev_err(adev->dev, "fence debugfs file creation failed\n");
@@ -554,9 +550,12 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
{
int i, r;
+ if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
+ kmem_cache_destroy(amdgpu_fence_slab);
mutex_lock(&adev->ring_lock);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
+
if (!ring || !ring->fence_drv.initialized)
continue;
r = amdgpu_fence_wait_empty(ring);
@@ -568,6 +567,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
amd_sched_fini(&ring->sched);
+ del_timer_sync(&ring->fence_drv.fallback_timer);
ring->fence_drv.initialized = false;
}
mutex_unlock(&adev->ring_lock);
@@ -751,18 +751,25 @@ static bool amdgpu_fence_enable_signaling(struct fence *f)
fence->fence_wake.func = amdgpu_fence_check_signaled;
__add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
fence_get(f);
- amdgpu_fence_schedule_check(ring);
+ if (!timer_pending(&ring->fence_drv.fallback_timer))
+ amdgpu_fence_schedule_fallback(ring);
FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
return true;
}
+static void amdgpu_fence_release(struct fence *f)
+{
+ struct amdgpu_fence *fence = to_amdgpu_fence(f);
+ kmem_cache_free(amdgpu_fence_slab, fence);
+}
+
const struct fence_ops amdgpu_fence_ops = {
.get_driver_name = amdgpu_fence_get_driver_name,
.get_timeline_name = amdgpu_fence_get_timeline_name,
.enable_signaling = amdgpu_fence_enable_signaling,
.signaled = amdgpu_fence_is_signaled,
.wait = fence_default_wait,
- .release = NULL,
+ .release = amdgpu_fence_release,
};
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 087332858853..9c253c535d26 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -115,12 +115,9 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va *bo_va;
int r;
- mutex_lock(&vm->mutex);
r = amdgpu_bo_reserve(rbo, false);
- if (r) {
- mutex_unlock(&vm->mutex);
+ if (r)
return r;
- }
bo_va = amdgpu_vm_bo_find(vm, rbo);
if (!bo_va) {
@@ -129,7 +126,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri
++bo_va->ref_count;
}
amdgpu_bo_unreserve(rbo);
- mutex_unlock(&vm->mutex);
return 0;
}
@@ -142,10 +138,8 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va *bo_va;
int r;
- mutex_lock(&vm->mutex);
r = amdgpu_bo_reserve(rbo, true);
if (r) {
- mutex_unlock(&vm->mutex);
dev_err(adev->dev, "leaking bo va because "
"we fail to reserve bo (%d)\n", r);
return;
@@ -157,7 +151,6 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
}
}
amdgpu_bo_unreserve(rbo);
- mutex_unlock(&vm->mutex);
}
static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)
@@ -242,8 +235,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
AMDGPU_GEM_USERPTR_REGISTER))
return -EINVAL;
- if (!(args->flags & AMDGPU_GEM_USERPTR_ANONONLY) ||
- !(args->flags & AMDGPU_GEM_USERPTR_REGISTER)) {
+ if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) && (
+ !(args->flags & AMDGPU_GEM_USERPTR_ANONONLY) ||
+ !(args->flags & AMDGPU_GEM_USERPTR_REGISTER))) {
/* if we want to write to it we must require anonymous
memory and install a MMU notifier */
@@ -483,6 +477,17 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
if (domain == AMDGPU_GEM_DOMAIN_CPU)
goto error_unreserve;
}
+ list_for_each_entry(entry, &duplicates, head) {
+ domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type);
+ /* if anything is swapped out don't swap it in here,
+ just abort and wait for the next CS */
+ if (domain == AMDGPU_GEM_DOMAIN_CPU)
+ goto error_unreserve;
+ }
+
+ r = amdgpu_vm_update_page_directory(adev, bo_va->vm);
+ if (r)
+ goto error_unreserve;
r = amdgpu_vm_clear_freed(adev, bo_va->vm);
if (r)
@@ -512,6 +517,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_bo *rbo;
struct amdgpu_bo_va *bo_va;
+ struct ttm_validate_buffer tv, tv_pd;
+ struct ww_acquire_ctx ticket;
+ struct list_head list, duplicates;
uint32_t invalid_flags, va_flags = 0;
int r = 0;
@@ -547,19 +555,28 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
gobj = drm_gem_object_lookup(dev, filp, args->handle);
if (gobj == NULL)
return -ENOENT;
- mutex_lock(&fpriv->vm.mutex);
rbo = gem_to_amdgpu_bo(gobj);
- r = amdgpu_bo_reserve(rbo, false);
+ INIT_LIST_HEAD(&list);
+ INIT_LIST_HEAD(&duplicates);
+ tv.bo = &rbo->tbo;
+ tv.shared = true;
+ list_add(&tv.head, &list);
+
+ if (args->operation == AMDGPU_VA_OP_MAP) {
+ tv_pd.bo = &fpriv->vm.page_directory->tbo;
+ tv_pd.shared = true;
+ list_add(&tv_pd.head, &list);
+ }
+ r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
if (r) {
- mutex_unlock(&fpriv->vm.mutex);
drm_gem_object_unreference_unlocked(gobj);
return r;
}
bo_va = amdgpu_vm_bo_find(&fpriv->vm, rbo);
if (!bo_va) {
- amdgpu_bo_unreserve(rbo);
- mutex_unlock(&fpriv->vm.mutex);
+ ttm_eu_backoff_reservation(&ticket, &list);
+ drm_gem_object_unreference_unlocked(gobj);
return -ENOENT;
}
@@ -581,10 +598,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
default:
break;
}
-
+ ttm_eu_backoff_reservation(&ticket, &list);
if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE))
amdgpu_gem_va_update_vm(adev, bo_va, args->operation);
- mutex_unlock(&fpriv->vm.mutex);
+
drm_gem_object_unreference_unlocked(gobj);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index e65987743871..9e25edafa721 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -62,7 +62,7 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
int r;
if (size) {
- r = amdgpu_sa_bo_new(adev, &adev->ring_tmp_bo,
+ r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
&ib->sa_bo, size, 256);
if (r) {
dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
@@ -216,7 +216,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
}
if (ib->vm)
- amdgpu_vm_fence(adev, ib->vm, ib->fence);
+ amdgpu_vm_fence(adev, ib->vm, &ib->fence->base);
amdgpu_ring_unlock_commit(ring);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 1618e2294a16..e23843f4d877 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -611,13 +611,59 @@ void amdgpu_driver_preclose_kms(struct drm_device *dev,
u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe)
{
struct amdgpu_device *adev = dev->dev_private;
+ int vpos, hpos, stat;
+ u32 count;
if (pipe >= adev->mode_info.num_crtc) {
DRM_ERROR("Invalid crtc %u\n", pipe);
return -EINVAL;
}
- return amdgpu_display_vblank_get_counter(adev, pipe);
+ /* The hw increments its frame counter at start of vsync, not at start
+ * of vblank, as is required by DRM core vblank counter handling.
+ * Cook the hw count here to make it appear to the caller as if it
+ * incremented at start of vblank. We measure distance to start of
+ * vblank in vpos. vpos therefore will be >= 0 between start of vblank
+ * and start of vsync, so vpos >= 0 means to bump the hw frame counter
+ * result by 1 to give the proper appearance to caller.
+ */
+ if (adev->mode_info.crtcs[pipe]) {
+ /* Repeat readout if needed to provide stable result if
+ * we cross start of vsync during the queries.
+ */
+ do {
+ count = amdgpu_display_vblank_get_counter(adev, pipe);
+ /* Ask amdgpu_get_crtc_scanoutpos to return vpos as
+ * distance to start of vblank, instead of regular
+ * vertical scanout pos.
+ */
+ stat = amdgpu_get_crtc_scanoutpos(
+ dev, pipe, GET_DISTANCE_TO_VBLANKSTART,
+ &vpos, &hpos, NULL, NULL,
+ &adev->mode_info.crtcs[pipe]->base.hwmode);
+ } while (count != amdgpu_display_vblank_get_counter(adev, pipe));
+
+ if (((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
+ (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE))) {
+ DRM_DEBUG_VBL("Query failed! stat %d\n", stat);
+ } else {
+ DRM_DEBUG_VBL("crtc %d: dist from vblank start %d\n",
+ pipe, vpos);
+
+ /* Bump counter if we are at >= leading edge of vblank,
+ * but before vsync where vpos would turn negative and
+ * the hw counter really increments.
+ */
+ if (vpos >= 0)
+ count++;
+ }
+ } else {
+ /* Fallback to use value as is. */
+ count = amdgpu_display_vblank_get_counter(adev, pipe);
+ DRM_DEBUG_VBL("NULL mode info! Returned count may be wrong.\n");
+ }
+
+ return count;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index b62c1710cab6..064ebb347074 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -407,6 +407,7 @@ struct amdgpu_crtc {
u32 line_time;
u32 wm_low;
u32 wm_high;
+ u32 lb_vblank_lead_lines;
struct drm_display_mode hw_mode;
};
@@ -528,6 +529,10 @@ struct amdgpu_framebuffer {
#define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \
((em) == ATOM_ENCODER_MODE_DP_MST))
+/* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */
+#define USE_REAL_VBLANKSTART (1 << 30)
+#define GET_DISTANCE_TO_VBLANKSTART (1 << 31)
+
void amdgpu_link_encoder_connector(struct drm_device *dev);
struct drm_connector *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 0d524384ff79..c3ce103b6a33 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -100,6 +100,7 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
list_del_init(&bo->list);
mutex_unlock(&bo->adev->gem.mutex);
drm_gem_object_release(&bo->gem_base);
+ amdgpu_bo_unref(&bo->parent);
kfree(bo->metadata);
kfree(bo);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 3c2ff4567798..ea756e77b023 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -189,10 +189,9 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager);
int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager);
-int amdgpu_sa_bo_new(struct amdgpu_device *adev,
- struct amdgpu_sa_manager *sa_manager,
- struct amdgpu_sa_bo **sa_bo,
- unsigned size, unsigned align);
+int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
+ struct amdgpu_sa_bo **sa_bo,
+ unsigned size, unsigned align);
void amdgpu_sa_bo_free(struct amdgpu_device *adev,
struct amdgpu_sa_bo **sa_bo,
struct fence *fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 0212b31dc194..8b88edb0434b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -311,8 +311,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
return false;
}
-int amdgpu_sa_bo_new(struct amdgpu_device *adev,
- struct amdgpu_sa_manager *sa_manager,
+int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
struct amdgpu_sa_bo **sa_bo,
unsigned size, unsigned align)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index dcf4a8aca680..438c05254695 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -26,6 +26,7 @@
#include <linux/sched.h>
#include <drm/drmP.h>
#include "amdgpu.h"
+#include "amdgpu_trace.h"
static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
{
@@ -44,11 +45,8 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
return NULL;
}
job = to_amdgpu_job(sched_job);
- mutex_lock(&job->job_lock);
- r = amdgpu_ib_schedule(job->adev,
- job->num_ibs,
- job->ibs,
- job->base.owner);
+ trace_amdgpu_sched_run_job(job);
+ r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner);
if (r) {
DRM_ERROR("Error scheduling IBs (%d)\n", r);
goto err;
@@ -61,8 +59,6 @@ err:
if (job->free_job)
job->free_job(job);
- mutex_unlock(&job->job_lock);
- fence_put(&job->base.s_fence->base);
kfree(job);
return fence ? &fence->base : NULL;
}
@@ -88,21 +84,19 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
return -ENOMEM;
job->base.sched = &ring->sched;
job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
+ job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
+ if (!job->base.s_fence) {
+ kfree(job);
+ return -ENOMEM;
+ }
+ *f = fence_get(&job->base.s_fence->base);
+
job->adev = adev;
job->ibs = ibs;
job->num_ibs = num_ibs;
- job->base.owner = owner;
- mutex_init(&job->job_lock);
+ job->owner = owner;
job->free_job = free_job;
- mutex_lock(&job->job_lock);
- r = amd_sched_entity_push_job(&job->base);
- if (r) {
- mutex_unlock(&job->job_lock);
- kfree(job);
- return r;
- }
- *f = fence_get(&job->base.s_fence->base);
- mutex_unlock(&job->job_lock);
+ amd_sched_entity_push_job(&job->base);
} else {
r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
index ff3ca52ec6fe..1caaf201b708 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
@@ -40,7 +40,7 @@ int amdgpu_semaphore_create(struct amdgpu_device *adev,
if (*semaphore == NULL) {
return -ENOMEM;
}
- r = amdgpu_sa_bo_new(adev, &adev->ring_tmp_bo,
+ r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
&(*semaphore)->sa_bo, 8, 8);
if (r) {
kfree(*semaphore);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index a6697fd05217..dd005c336c97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -302,8 +302,14 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
return -EINVAL;
}
- if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores ||
- (count >= AMDGPU_NUM_SYNCS)) {
+ if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) {
+ r = fence_wait(&fence->base, true);
+ if (r)
+ return r;
+ continue;
+ }
+
+ if (count >= AMDGPU_NUM_SYNCS) {
/* not enough room, wait manually */
r = fence_wait(&fence->base, false);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 76ecbaf72a2e..8f9834ab1bd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -48,6 +48,57 @@ TRACE_EVENT(amdgpu_cs,
__entry->fences)
);
+TRACE_EVENT(amdgpu_cs_ioctl,
+ TP_PROTO(struct amdgpu_job *job),
+ TP_ARGS(job),
+ TP_STRUCT__entry(
+ __field(struct amdgpu_device *, adev)
+ __field(struct amd_sched_job *, sched_job)
+ __field(struct amdgpu_ib *, ib)
+ __field(struct fence *, fence)
+ __field(char *, ring_name)
+ __field(u32, num_ibs)
+ ),
+
+ TP_fast_assign(
+ __entry->adev = job->adev;
+ __entry->sched_job = &job->base;
+ __entry->ib = job->ibs;
+ __entry->fence = &job->base.s_fence->base;
+ __entry->ring_name = job->ibs[0].ring->name;
+ __entry->num_ibs = job->num_ibs;
+ ),
+ TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
+ __entry->adev, __entry->sched_job, __entry->ib,
+ __entry->fence, __entry->ring_name, __entry->num_ibs)
+);
+
+TRACE_EVENT(amdgpu_sched_run_job,
+ TP_PROTO(struct amdgpu_job *job),
+ TP_ARGS(job),
+ TP_STRUCT__entry(
+ __field(struct amdgpu_device *, adev)
+ __field(struct amd_sched_job *, sched_job)
+ __field(struct amdgpu_ib *, ib)
+ __field(struct fence *, fence)
+ __field(char *, ring_name)
+ __field(u32, num_ibs)
+ ),
+
+ TP_fast_assign(
+ __entry->adev = job->adev;
+ __entry->sched_job = &job->base;
+ __entry->ib = job->ibs;
+ __entry->fence = &job->base.s_fence->base;
+ __entry->ring_name = job->ibs[0].ring->name;
+ __entry->num_ibs = job->num_ibs;
+ ),
+ TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
+ __entry->adev, __entry->sched_job, __entry->ib,
+ __entry->fence, __entry->ring_name, __entry->num_ibs)
+);
+
+
TRACE_EVENT(amdgpu_vm_grab_id,
TP_PROTO(unsigned vmid, int ring),
TP_ARGS(vmid, ring),
@@ -196,49 +247,6 @@ TRACE_EVENT(amdgpu_bo_list_set,
TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
);
-DECLARE_EVENT_CLASS(amdgpu_fence_request,
-
- TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
-
- TP_ARGS(dev, ring, seqno),
-
- TP_STRUCT__entry(
- __field(u32, dev)
- __field(int, ring)
- __field(u32, seqno)
- ),
-
- TP_fast_assign(
- __entry->dev = dev->primary->index;
- __entry->ring = ring;
- __entry->seqno = seqno;
- ),
-
- TP_printk("dev=%u, ring=%d, seqno=%u",
- __entry->dev, __entry->ring, __entry->seqno)
-);
-
-DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_emit,
-
- TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
-
- TP_ARGS(dev, ring, seqno)
-);
-
-DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_wait_begin,
-
- TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
-
- TP_ARGS(dev, ring, seqno)
-);
-
-DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_wait_end,
-
- TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
-
- TP_ARGS(dev, ring, seqno)
-);
-
DECLARE_EVENT_CLASS(amdgpu_semaphore_request,
TP_PROTO(int ring, struct amdgpu_semaphore *sem),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 81bb8e9fc26d..8a1752ff3d8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -587,9 +587,13 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
int r;
- if (gtt->userptr)
- amdgpu_ttm_tt_pin_userptr(ttm);
-
+ if (gtt->userptr) {
+ r = amdgpu_ttm_tt_pin_userptr(ttm);
+ if (r) {
+ DRM_ERROR("failed to pin userptr\n");
+ return r;
+ }
+ }
gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT);
if (!ttm->num_pages) {
WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
@@ -797,11 +801,12 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
if (mem && mem->mem_type != TTM_PL_SYSTEM)
flags |= AMDGPU_PTE_VALID;
- if (mem && mem->mem_type == TTM_PL_TT)
+ if (mem && mem->mem_type == TTM_PL_TT) {
flags |= AMDGPU_PTE_SYSTEM;
- if (!ttm || ttm->caching_state == tt_cached)
- flags |= AMDGPU_PTE_SNOOPED;
+ if (ttm->caching_state == tt_cached)
+ flags |= AMDGPU_PTE_SNOOPED;
+ }
if (adev->asic_type >= CHIP_TOPAZ)
flags |= AMDGPU_PTE_EXECUTABLE;
@@ -1073,10 +1078,10 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
ret = drm_mm_dump_table(m, mm);
spin_unlock(&glob->lru_lock);
if (ttm_pl == TTM_PL_VRAM)
- seq_printf(m, "man size:%llu pages, ram usage:%luMB, vis usage:%luMB\n",
+ seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
adev->mman.bdev.man[ttm_pl].size,
- atomic64_read(&adev->vram_usage) >> 20,
- atomic64_read(&adev->vram_vis_usage) >> 20);
+ (u64)atomic64_read(&adev->vram_usage) >> 20,
+ (u64)atomic64_read(&adev->vram_vis_usage) >> 20);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 03f0c3bae516..a745eeeb5d82 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -392,7 +392,10 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
ib->ptr[ib->length_dw++] = handle;
- ib->ptr[ib->length_dw++] = 0x00000030; /* len */
+ if ((ring->adev->vce.fw_version >> 24) >= 52)
+ ib->ptr[ib->length_dw++] = 0x00000040; /* len */
+ else
+ ib->ptr[ib->length_dw++] = 0x00000030; /* len */
ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */
ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000042;
@@ -404,6 +407,12 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
ib->ptr[ib->length_dw++] = 0x00000100;
ib->ptr[ib->length_dw++] = 0x0000000c;
ib->ptr[ib->length_dw++] = 0x00000000;
+ if ((ring->adev->vce.fw_version >> 24) >= 52) {
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ ib->ptr[ib->length_dw++] = 0x00000000;
+ }
ib->ptr[ib->length_dw++] = 0x00000014; /* len */
ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 633a32a48560..b53d273eb7a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -143,10 +143,15 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
unsigned i;
/* check if the id is still valid */
- if (vm_id->id && vm_id->last_id_use &&
- vm_id->last_id_use == adev->vm_manager.active[vm_id->id]) {
- trace_amdgpu_vm_grab_id(vm_id->id, ring->idx);
- return 0;
+ if (vm_id->id) {
+ unsigned id = vm_id->id;
+ long owner;
+
+ owner = atomic_long_read(&adev->vm_manager.ids[id].owner);
+ if (owner == (long)vm) {
+ trace_amdgpu_vm_grab_id(vm_id->id, ring->idx);
+ return 0;
+ }
}
/* we definately need to flush */
@@ -154,7 +159,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
/* skip over VMID 0, since it is the system VM */
for (i = 1; i < adev->vm_manager.nvm; ++i) {
- struct fence *fence = adev->vm_manager.active[i];
+ struct fence *fence = adev->vm_manager.ids[i].active;
struct amdgpu_ring *fring;
if (fence == NULL) {
@@ -176,7 +181,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
if (choices[i]) {
struct fence *fence;
- fence = adev->vm_manager.active[choices[i]];
+ fence = adev->vm_manager.ids[choices[i]].active;
vm_id->id = choices[i];
trace_amdgpu_vm_grab_id(choices[i], ring->idx);
@@ -207,24 +212,21 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
struct fence *flushed_updates = vm_id->flushed_updates;
- bool is_earlier = false;
+ bool is_later;
- if (flushed_updates && updates) {
- BUG_ON(flushed_updates->context != updates->context);
- is_earlier = (updates->seqno - flushed_updates->seqno <=
- INT_MAX) ? true : false;
- }
-
- if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates ||
- is_earlier) {
+ if (!flushed_updates)
+ is_later = true;
+ else if (!updates)
+ is_later = false;
+ else
+ is_later = fence_is_later(updates, flushed_updates);
+ if (pd_addr != vm_id->pd_gpu_addr || is_later) {
trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id);
- if (is_earlier) {
+ if (is_later) {
vm_id->flushed_updates = fence_get(updates);
fence_put(flushed_updates);
}
- if (!flushed_updates)
- vm_id->flushed_updates = fence_get(updates);
vm_id->pd_gpu_addr = pd_addr;
amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr);
}
@@ -244,16 +246,14 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
*/
void amdgpu_vm_fence(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
- struct amdgpu_fence *fence)
+ struct fence *fence)
{
- unsigned ridx = fence->ring->idx;
- unsigned vm_id = vm->ids[ridx].id;
-
- fence_put(adev->vm_manager.active[vm_id]);
- adev->vm_manager.active[vm_id] = fence_get(&fence->base);
+ struct amdgpu_ring *ring = amdgpu_ring_from_fence(fence);
+ unsigned vm_id = vm->ids[ring->idx].id;
- fence_put(vm->ids[ridx].last_id_use);
- vm->ids[ridx].last_id_use = fence_get(&fence->base);
+ fence_put(adev->vm_manager.ids[vm_id].active);
+ adev->vm_manager.ids[vm_id].active = fence_get(fence);
+ atomic_long_set(&adev->vm_manager.ids[vm_id].owner, (long)vm);
}
/**
@@ -332,6 +332,8 @@ int amdgpu_vm_free_job(struct amdgpu_job *job)
*
* @adev: amdgpu_device pointer
* @bo: bo to clear
+ *
+ * need to reserve bo first before calling it.
*/
static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
struct amdgpu_bo *bo)
@@ -343,24 +345,20 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
uint64_t addr;
int r;
- r = amdgpu_bo_reserve(bo, false);
- if (r)
- return r;
-
r = reservation_object_reserve_shared(bo->tbo.resv);
if (r)
return r;
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
if (r)
- goto error_unreserve;
+ goto error;
addr = amdgpu_bo_gpu_offset(bo);
entries = amdgpu_bo_size(bo) / 8;
ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
if (!ib)
- goto error_unreserve;
+ goto error;
r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib);
if (r)
@@ -378,16 +376,14 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
if (!r)
amdgpu_bo_fence(bo, fence, true);
fence_put(fence);
- if (amdgpu_enable_scheduler) {
- amdgpu_bo_unreserve(bo);
+ if (amdgpu_enable_scheduler)
return 0;
- }
+
error_free:
amdgpu_ib_free(adev, ib);
kfree(ib);
-error_unreserve:
- amdgpu_bo_unreserve(bo);
+error:
return r;
}
@@ -889,17 +885,21 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping;
int r;
+ spin_lock(&vm->freed_lock);
while (!list_empty(&vm->freed)) {
mapping = list_first_entry(&vm->freed,
struct amdgpu_bo_va_mapping, list);
list_del(&mapping->list);
-
+ spin_unlock(&vm->freed_lock);
r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, 0, 0, NULL);
kfree(mapping);
if (r)
return r;
+ spin_lock(&vm->freed_lock);
}
+ spin_unlock(&vm->freed_lock);
+
return 0;
}
@@ -926,8 +926,9 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
bo_va = list_first_entry(&vm->invalidated,
struct amdgpu_bo_va, vm_status);
spin_unlock(&vm->status_lock);
-
+ mutex_lock(&bo_va->mutex);
r = amdgpu_vm_bo_update(adev, bo_va, NULL);
+ mutex_unlock(&bo_va->mutex);
if (r)
return r;
@@ -971,7 +972,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
INIT_LIST_HEAD(&bo_va->valids);
INIT_LIST_HEAD(&bo_va->invalids);
INIT_LIST_HEAD(&bo_va->vm_status);
-
+ mutex_init(&bo_va->mutex);
list_add_tail(&bo_va->bo_list, &bo->va);
return bo_va;
@@ -989,7 +990,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
* Add a mapping of the BO at the specefied addr into the VM.
* Returns 0 for success, error for failure.
*
- * Object has to be reserved and gets unreserved by this function!
+ * Object has to be reserved and unreserved outside!
*/
int amdgpu_vm_bo_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
@@ -1005,30 +1006,27 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
/* validate the parameters */
if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
- size == 0 || size & AMDGPU_GPU_PAGE_MASK) {
- amdgpu_bo_unreserve(bo_va->bo);
+ size == 0 || size & AMDGPU_GPU_PAGE_MASK)
return -EINVAL;
- }
/* make sure object fit at this offset */
eaddr = saddr + size;
- if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo))) {
- amdgpu_bo_unreserve(bo_va->bo);
+ if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo)))
return -EINVAL;
- }
last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
if (last_pfn > adev->vm_manager.max_pfn) {
dev_err(adev->dev, "va above limit (0x%08X > 0x%08X)\n",
last_pfn, adev->vm_manager.max_pfn);
- amdgpu_bo_unreserve(bo_va->bo);
return -EINVAL;
}
saddr /= AMDGPU_GPU_PAGE_SIZE;
eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ spin_lock(&vm->it_lock);
it = interval_tree_iter_first(&vm->va, saddr, eaddr - 1);
+ spin_unlock(&vm->it_lock);
if (it) {
struct amdgpu_bo_va_mapping *tmp;
tmp = container_of(it, struct amdgpu_bo_va_mapping, it);
@@ -1036,14 +1034,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
"0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr,
tmp->it.start, tmp->it.last + 1);
- amdgpu_bo_unreserve(bo_va->bo);
r = -EINVAL;
goto error;
}
mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
if (!mapping) {
- amdgpu_bo_unreserve(bo_va->bo);
r = -ENOMEM;
goto error;
}
@@ -1054,8 +1050,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
mapping->offset = offset;
mapping->flags = flags;
+ mutex_lock(&bo_va->mutex);
list_add(&mapping->list, &bo_va->invalids);
+ mutex_unlock(&bo_va->mutex);
+ spin_lock(&vm->it_lock);
interval_tree_insert(&mapping->it, &vm->va);
+ spin_unlock(&vm->it_lock);
trace_amdgpu_vm_bo_map(bo_va, mapping);
/* Make sure the page tables are allocated */
@@ -1067,8 +1067,6 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
if (eaddr > vm->max_pde_used)
vm->max_pde_used = eaddr;
- amdgpu_bo_unreserve(bo_va->bo);
-
/* walk over the address space and allocate the page tables */
for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) {
struct reservation_object *resv = vm->page_directory->tbo.resv;
@@ -1077,16 +1075,19 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
if (vm->page_tables[pt_idx].bo)
continue;
- ww_mutex_lock(&resv->lock, NULL);
r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
AMDGPU_GPU_PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
NULL, resv, &pt);
- ww_mutex_unlock(&resv->lock);
if (r)
goto error_free;
+ /* Keep a reference to the page table to avoid freeing
+ * them up in the wrong order.
+ */
+ pt->parent = amdgpu_bo_ref(vm->page_directory);
+
r = amdgpu_vm_clear_bo(adev, pt);
if (r) {
amdgpu_bo_unref(&pt);
@@ -1101,7 +1102,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
error_free:
list_del(&mapping->list);
+ spin_lock(&vm->it_lock);
interval_tree_remove(&mapping->it, &vm->va);
+ spin_unlock(&vm->it_lock);
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
kfree(mapping);
@@ -1119,7 +1122,7 @@ error:
* Remove a mapping of the BO at the specefied addr from the VM.
* Returns 0 for success, error for failure.
*
- * Object has to be reserved and gets unreserved by this function!
+ * Object has to be reserved and unreserved outside!
*/
int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
@@ -1130,7 +1133,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
bool valid = true;
saddr /= AMDGPU_GPU_PAGE_SIZE;
-
+ mutex_lock(&bo_va->mutex);
list_for_each_entry(mapping, &bo_va->valids, list) {
if (mapping->it.start == saddr)
break;
@@ -1145,20 +1148,24 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
}
if (&mapping->list == &bo_va->invalids) {
- amdgpu_bo_unreserve(bo_va->bo);
+ mutex_unlock(&bo_va->mutex);
return -ENOENT;
}
}
-
+ mutex_unlock(&bo_va->mutex);
list_del(&mapping->list);
+ spin_lock(&vm->it_lock);
interval_tree_remove(&mapping->it, &vm->va);
+ spin_unlock(&vm->it_lock);
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
- if (valid)
+ if (valid) {
+ spin_lock(&vm->freed_lock);
list_add(&mapping->list, &vm->freed);
- else
+ spin_unlock(&vm->freed_lock);
+ } else {
kfree(mapping);
- amdgpu_bo_unreserve(bo_va->bo);
+ }
return 0;
}
@@ -1187,17 +1194,23 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
list_del(&mapping->list);
+ spin_lock(&vm->it_lock);
interval_tree_remove(&mapping->it, &vm->va);
+ spin_unlock(&vm->it_lock);
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
+ spin_lock(&vm->freed_lock);
list_add(&mapping->list, &vm->freed);
+ spin_unlock(&vm->freed_lock);
}
list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
list_del(&mapping->list);
+ spin_lock(&vm->it_lock);
interval_tree_remove(&mapping->it, &vm->va);
+ spin_unlock(&vm->it_lock);
kfree(mapping);
}
-
fence_put(bo_va->last_pt_update);
+ mutex_destroy(&bo_va->mutex);
kfree(bo_va);
}
@@ -1241,15 +1254,14 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
vm->ids[i].id = 0;
vm->ids[i].flushed_updates = NULL;
- vm->ids[i].last_id_use = NULL;
}
- mutex_init(&vm->mutex);
vm->va = RB_ROOT;
spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->invalidated);
INIT_LIST_HEAD(&vm->cleared);
INIT_LIST_HEAD(&vm->freed);
-
+ spin_lock_init(&vm->it_lock);
+ spin_lock_init(&vm->freed_lock);
pd_size = amdgpu_vm_directory_size(adev);
pd_entries = amdgpu_vm_num_pdes(adev);
@@ -1269,8 +1281,14 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
NULL, NULL, &vm->page_directory);
if (r)
return r;
-
+ r = amdgpu_bo_reserve(vm->page_directory, false);
+ if (r) {
+ amdgpu_bo_unref(&vm->page_directory);
+ vm->page_directory = NULL;
+ return r;
+ }
r = amdgpu_vm_clear_bo(adev, vm->page_directory);
+ amdgpu_bo_unreserve(vm->page_directory);
if (r) {
amdgpu_bo_unref(&vm->page_directory);
vm->page_directory = NULL;
@@ -1313,11 +1331,27 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_bo_unref(&vm->page_directory);
fence_put(vm->page_directory_fence);
-
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ unsigned id = vm->ids[i].id;
+
+ atomic_long_cmpxchg(&adev->vm_manager.ids[id].owner,
+ (long)vm, 0);
fence_put(vm->ids[i].flushed_updates);
- fence_put(vm->ids[i].last_id_use);
}
- mutex_destroy(&vm->mutex);
+}
+
+/**
+ * amdgpu_vm_manager_fini - cleanup VM manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Cleanup the VM manager and free resources.
+ */
+void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ for (i = 0; i < AMDGPU_NUM_VM; ++i)
+ fence_put(adev->vm_manager.ids[i].active);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index a1a35a5df8e7..57a2e347f04d 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -6569,12 +6569,12 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
- cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
+ cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
break;
case AMDGPU_IRQ_STATE_ENABLE:
cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
- cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
+ cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
break;
default:
@@ -6586,12 +6586,12 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
- cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
+ cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
break;
case AMDGPU_IRQ_STATE_ENABLE:
cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
- cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
+ cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index cb0f7747e3dc..4dcc8fba5792 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -1250,7 +1250,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
u32 pixel_period;
u32 line_time = 0;
u32 latency_watermark_a = 0, latency_watermark_b = 0;
- u32 tmp, wm_mask;
+ u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
if (amdgpu_crtc->base.enabled && num_heads && mode) {
pixel_period = 1000000 / (u32)mode->clock;
@@ -1333,6 +1333,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
(adev->mode_info.disp_priority == 2)) {
DRM_DEBUG_KMS("force priority to high\n");
}
+ lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
}
/* select wm A */
@@ -1357,6 +1358,8 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
amdgpu_crtc->line_time = line_time;
amdgpu_crtc->wm_high = latency_watermark_a;
amdgpu_crtc->wm_low = latency_watermark_b;
+ /* Save number of lines the linebuffer leads before the scanout */
+ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 5af3721851d6..8f1e51128b33 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -1238,7 +1238,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
u32 pixel_period;
u32 line_time = 0;
u32 latency_watermark_a = 0, latency_watermark_b = 0;
- u32 tmp, wm_mask;
+ u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
if (amdgpu_crtc->base.enabled && num_heads && mode) {
pixel_period = 1000000 / (u32)mode->clock;
@@ -1321,6 +1321,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
(adev->mode_info.disp_priority == 2)) {
DRM_DEBUG_KMS("force priority to high\n");
}
+ lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
}
/* select wm A */
@@ -1345,6 +1346,8 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
amdgpu_crtc->line_time = line_time;
amdgpu_crtc->wm_high = latency_watermark_a;
amdgpu_crtc->wm_low = latency_watermark_b;
+ /* Save number of lines the linebuffer leads before the scanout */
+ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 4f7b49a6dc50..42d954dc436d 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -1193,7 +1193,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
u32 pixel_period;
u32 line_time = 0;
u32 latency_watermark_a = 0, latency_watermark_b = 0;
- u32 tmp, wm_mask;
+ u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
if (amdgpu_crtc->base.enabled && num_heads && mode) {
pixel_period = 1000000 / (u32)mode->clock;
@@ -1276,6 +1276,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
(adev->mode_info.disp_priority == 2)) {
DRM_DEBUG_KMS("force priority to high\n");
}
+ lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
}
/* select wm A */
@@ -1302,6 +1303,8 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
amdgpu_crtc->line_time = line_time;
amdgpu_crtc->wm_high = latency_watermark_a;
amdgpu_crtc->wm_low = latency_watermark_b;
+ /* Save number of lines the linebuffer leads before the scanout */
+ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 6776cf756d40..e1dcab98e249 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -268,7 +268,6 @@ static const u32 fiji_mgcg_cgcg_init[] =
mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
- mmCGTT_DRM_CLK_CTRL0, 0xffffffff, 0x00600100,
mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
@@ -296,10 +295,6 @@ static const u32 fiji_mgcg_cgcg_init[] =
mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
- mmPCIE_INDEX, 0xffffffff, 0x0140001c,
- mmPCIE_DATA, 0x000f0000, 0x00000000,
- mmCGTT_DRM_CLK_CTRL0, 0xff000fff, 0x00000100,
- mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104,
mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
};
@@ -1000,7 +995,7 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.max_cu_per_sh = 16;
adev->gfx.config.max_sh_per_se = 1;
adev->gfx.config.max_backends_per_se = 4;
- adev->gfx.config.max_texture_channel_caches = 8;
+ adev->gfx.config.max_texture_channel_caches = 16;
adev->gfx.config.max_gprs = 256;
adev->gfx.config.max_gs_threads = 32;
adev->gfx.config.max_hw_contexts = 8;
@@ -1613,6 +1608,296 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
}
case CHIP_FIJI:
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
+ switch (reg_offset) {
+ case 0:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ break;
+ case 1:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ break;
+ case 2:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ break;
+ case 3:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ break;
+ case 4:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ break;
+ case 5:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ break;
+ case 6:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ break;
+ case 7:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ break;
+ case 8:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
+ break;
+ case 9:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ break;
+ case 10:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ break;
+ case 11:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ break;
+ case 12:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ break;
+ case 13:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ break;
+ case 14:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ break;
+ case 15:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ break;
+ case 16:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ break;
+ case 17:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ break;
+ case 18:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 19:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 20:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 21:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 22:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 23:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 24:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 25:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 26:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ break;
+ case 27:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ break;
+ case 28:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ break;
+ case 29:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ break;
+ case 30:
+ gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ break;
+ default:
+ gb_tile_moden = 0;
+ break;
+ }
+ adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
+ WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
+ }
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
+ switch (reg_offset) {
+ case 0:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 1:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 2:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 3:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 4:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 5:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 6:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 8:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 9:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 10:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 11:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 12:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 13:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ break;
+ case 14:
+ gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+ break;
+ case 7:
+ /* unused idx */
+ continue;
+ default:
+ gb_tile_moden = 0;
+ break;
+ }
+ adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
+ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
+ }
+ break;
case CHIP_TONGA:
for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
switch (reg_offset) {
@@ -2971,10 +3256,13 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
switch (adev->asic_type) {
case CHIP_TONGA:
- case CHIP_FIJI:
amdgpu_ring_write(ring, 0x16000012);
amdgpu_ring_write(ring, 0x0000002A);
break;
+ case CHIP_FIJI:
+ amdgpu_ring_write(ring, 0x3a00161a);
+ amdgpu_ring_write(ring, 0x0000002e);
+ break;
case CHIP_TOPAZ:
case CHIP_CARRIZO:
amdgpu_ring_write(ring, 0x00000002);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 85bbcdc73fff..ed8abb58a785 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -40,7 +40,7 @@
static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev);
static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
-MODULE_FIRMWARE("radeon/boniare_mc.bin");
+MODULE_FIRMWARE("radeon/bonaire_mc.bin");
MODULE_FIRMWARE("radeon/hawaii_mc.bin");
/**
@@ -501,6 +501,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
WREG32(mmVM_L2_CNTL, tmp);
tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
@@ -512,7 +513,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
WREG32(mmVM_L2_CNTL3, tmp);
/* setup context0 */
WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12);
- WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, (adev->mc.gtt_end >> 12) - 1);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
(u32)(adev->dummy_page.addr >> 12));
@@ -960,12 +961,10 @@ static int gmc_v7_0_sw_init(void *handle)
static int gmc_v7_0_sw_fini(void *handle)
{
- int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->vm_manager.enabled) {
- for (i = 0; i < AMDGPU_NUM_VM; ++i)
- fence_put(adev->vm_manager.active[i]);
+ amdgpu_vm_manager_fini(adev);
gmc_v7_0_vm_fini(adev);
adev->vm_manager.enabled = false;
}
@@ -1010,12 +1009,10 @@ static int gmc_v7_0_hw_fini(void *handle)
static int gmc_v7_0_suspend(void *handle)
{
- int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->vm_manager.enabled) {
- for (i = 0; i < AMDGPU_NUM_VM; ++i)
- fence_put(adev->vm_manager.active[i]);
+ amdgpu_vm_manager_fini(adev);
gmc_v7_0_vm_fini(adev);
adev->vm_manager.enabled = false;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 1bcc4e74e3b4..d39028440814 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -629,6 +629,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
WREG32(mmVM_L2_CNTL, tmp);
tmp = RREG32(mmVM_L2_CNTL2);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
@@ -656,7 +657,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
WREG32(mmVM_L2_CNTL4, tmp);
/* setup context0 */
WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12);
- WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, (adev->mc.gtt_end >> 12) - 1);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
(u32)(adev->dummy_page.addr >> 12));
@@ -979,12 +980,10 @@ static int gmc_v8_0_sw_init(void *handle)
static int gmc_v8_0_sw_fini(void *handle)
{
- int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->vm_manager.enabled) {
- for (i = 0; i < AMDGPU_NUM_VM; ++i)
- fence_put(adev->vm_manager.active[i]);
+ amdgpu_vm_manager_fini(adev);
gmc_v8_0_vm_fini(adev);
adev->vm_manager.enabled = false;
}
@@ -1031,12 +1030,10 @@ static int gmc_v8_0_hw_fini(void *handle)
static int gmc_v8_0_suspend(void *handle)
{
- int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->vm_manager.enabled) {
- for (i = 0; i < AMDGPU_NUM_VM; ++i)
- fence_put(adev->vm_manager.active[i]);
+ amdgpu_vm_manager_fini(adev);
gmc_v8_0_vm_fini(adev);
adev->vm_manager.enabled = false;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 6a52db6ad8d7..370c6c9d81c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -40,6 +40,9 @@
#define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT 0x04
#define GRBM_GFX_INDEX__VCE_INSTANCE_MASK 0x10
+#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0 0x8616
+#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1 0x8617
+#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2 0x8618
#define VCE_V3_0_FW_SIZE (384 * 1024)
#define VCE_V3_0_STACK_SIZE (64 * 1024)
@@ -130,9 +133,11 @@ static int vce_v3_0_start(struct amdgpu_device *adev)
/* set BUSY flag */
WREG32_P(mmVCE_STATUS, 1, ~1);
-
- WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
- ~VCE_VCPU_CNTL__CLK_EN_MASK);
+ if (adev->asic_type >= CHIP_STONEY)
+ WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
+ else
+ WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
+ ~VCE_VCPU_CNTL__CLK_EN_MASK);
WREG32_P(mmVCE_SOFT_RESET,
VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
@@ -391,8 +396,12 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
WREG32(mmVCE_LMI_SWAP_CNTL, 0);
WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
WREG32(mmVCE_LMI_VM_CTRL, 0);
-
- WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
+ if (adev->asic_type >= CHIP_STONEY) {
+ WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
+ WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
+ WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
+ } else
+ WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
offset = AMDGPU_VCE_FIRMWARE_OFFSET;
size = VCE_V3_0_FW_SIZE;
WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
@@ -576,6 +585,11 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
DRM_DEBUG("IH: VCE\n");
+
+ WREG32_P(mmVCE_SYS_INT_STATUS,
+ VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
+ ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
+
switch (entry->src_data) {
case 0:
amdgpu_fence_process(&adev->vce.ring[0]);
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
index 144f50acc971..c89dc777768f 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
@@ -16,6 +16,8 @@ TRACE_EVENT(amd_sched_job,
TP_ARGS(sched_job),
TP_STRUCT__entry(
__field(struct amd_sched_entity *, entity)
+ __field(struct amd_sched_job *, sched_job)
+ __field(struct fence *, fence)
__field(const char *, name)
__field(u32, job_count)
__field(int, hw_job_count)
@@ -23,16 +25,32 @@ TRACE_EVENT(amd_sched_job,
TP_fast_assign(
__entry->entity = sched_job->s_entity;
+ __entry->sched_job = sched_job;
+ __entry->fence = &sched_job->s_fence->base;
__entry->name = sched_job->sched->name;
__entry->job_count = kfifo_len(
&sched_job->s_entity->job_queue) / sizeof(sched_job);
__entry->hw_job_count = atomic_read(
&sched_job->sched->hw_rq_count);
),
- TP_printk("entity=%p, ring=%s, job count:%u, hw job count:%d",
- __entry->entity, __entry->name, __entry->job_count,
- __entry->hw_job_count)
+ TP_printk("entity=%p, sched job=%p, fence=%p, ring=%s, job count:%u, hw job count:%d",
+ __entry->entity, __entry->sched_job, __entry->fence, __entry->name,
+ __entry->job_count, __entry->hw_job_count)
);
+
+TRACE_EVENT(amd_sched_process_job,
+ TP_PROTO(struct amd_sched_fence *fence),
+ TP_ARGS(fence),
+ TP_STRUCT__entry(
+ __field(struct fence *, fence)
+ ),
+
+ TP_fast_assign(
+ __entry->fence = &fence->base;
+ ),
+ TP_printk("fence=%p signaled", __entry->fence)
+);
+
#endif
/* This part must be outside protection */
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 89619a5a4289..3a4820e863ec 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -30,10 +30,12 @@
#define CREATE_TRACE_POINTS
#include "gpu_sched_trace.h"
-static struct amd_sched_job *
-amd_sched_entity_pop_job(struct amd_sched_entity *entity);
+static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);
static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
+struct kmem_cache *sched_fence_slab;
+atomic_t sched_fence_slab_ref = ATOMIC_INIT(0);
+
/* Initialize a given run queue struct */
static void amd_sched_rq_init(struct amd_sched_rq *rq)
{
@@ -61,36 +63,36 @@ static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq,
}
/**
- * Select next job from a specified run queue with round robin policy.
- * Return NULL if nothing available.
+ * Select an entity which could provide a job to run
+ *
+ * @rq The run queue to check.
+ *
+ * Try to find a ready entity, returns NULL if none found.
*/
-static struct amd_sched_job *
-amd_sched_rq_select_job(struct amd_sched_rq *rq)
+static struct amd_sched_entity *
+amd_sched_rq_select_entity(struct amd_sched_rq *rq)
{
struct amd_sched_entity *entity;
- struct amd_sched_job *sched_job;
spin_lock(&rq->lock);
entity = rq->current_entity;
if (entity) {
list_for_each_entry_continue(entity, &rq->entities, list) {
- sched_job = amd_sched_entity_pop_job(entity);
- if (sched_job) {
+ if (amd_sched_entity_is_ready(entity)) {
rq->current_entity = entity;
spin_unlock(&rq->lock);
- return sched_job;
+ return entity;
}
}
}
list_for_each_entry(entity, &rq->entities, list) {
- sched_job = amd_sched_entity_pop_job(entity);
- if (sched_job) {
+ if (amd_sched_entity_is_ready(entity)) {
rq->current_entity = entity;
spin_unlock(&rq->lock);
- return sched_job;
+ return entity;
}
if (entity == rq->current_entity)
@@ -174,6 +176,24 @@ static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity)
}
/**
+ * Check if entity is ready
+ *
+ * @entity The pointer to a valid scheduler entity
+ *
+ * Return true if entity could provide a job.
+ */
+static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity)
+{
+ if (kfifo_is_empty(&entity->job_queue))
+ return false;
+
+ if (ACCESS_ONCE(entity->dependency))
+ return false;
+
+ return true;
+}
+
+/**
* Destroy a context entity
*
* @sched Pointer to scheduler instance
@@ -208,32 +228,53 @@ static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb)
amd_sched_wakeup(entity->sched);
}
+static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
+{
+ struct amd_gpu_scheduler *sched = entity->sched;
+ struct fence * fence = entity->dependency;
+ struct amd_sched_fence *s_fence;
+
+ if (fence->context == entity->fence_context) {
+ /* We can ignore fences from ourself */
+ fence_put(entity->dependency);
+ return false;
+ }
+
+ s_fence = to_amd_sched_fence(fence);
+ if (s_fence && s_fence->sched == sched) {
+ /* Fence is from the same scheduler */
+ if (test_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &fence->flags)) {
+ /* Ignore it when it is already scheduled */
+ fence_put(entity->dependency);
+ return false;
+ }
+
+ /* Wait for fence to be scheduled */
+ entity->cb.func = amd_sched_entity_wakeup;
+ list_add_tail(&entity->cb.node, &s_fence->scheduled_cb);
+ return true;
+ }
+
+ if (!fence_add_callback(entity->dependency, &entity->cb,
+ amd_sched_entity_wakeup))
+ return true;
+
+ fence_put(entity->dependency);
+ return false;
+}
+
static struct amd_sched_job *
amd_sched_entity_pop_job(struct amd_sched_entity *entity)
{
struct amd_gpu_scheduler *sched = entity->sched;
struct amd_sched_job *sched_job;
- if (ACCESS_ONCE(entity->dependency))
- return NULL;
-
if (!kfifo_out_peek(&entity->job_queue, &sched_job, sizeof(sched_job)))
return NULL;
- while ((entity->dependency = sched->ops->dependency(sched_job))) {
-
- if (entity->dependency->context == entity->fence_context) {
- /* We can ignore fences from ourself */
- fence_put(entity->dependency);
- continue;
- }
-
- if (fence_add_callback(entity->dependency, &entity->cb,
- amd_sched_entity_wakeup))
- fence_put(entity->dependency);
- else
+ while ((entity->dependency = sched->ops->dependency(sched_job)))
+ if (amd_sched_entity_add_dependency_cb(entity))
return NULL;
- }
return sched_job;
}
@@ -247,6 +288,7 @@ amd_sched_entity_pop_job(struct amd_sched_entity *entity)
*/
static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
{
+ struct amd_gpu_scheduler *sched = sched_job->sched;
struct amd_sched_entity *entity = sched_job->s_entity;
bool added, first = false;
@@ -261,7 +303,7 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
/* first job wakes up scheduler */
if (first)
- amd_sched_wakeup(sched_job->sched);
+ amd_sched_wakeup(sched);
return added;
}
@@ -273,22 +315,13 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
*
* Returns 0 for success, negative error code otherwise.
*/
-int amd_sched_entity_push_job(struct amd_sched_job *sched_job)
+void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
{
struct amd_sched_entity *entity = sched_job->s_entity;
- struct amd_sched_fence *fence = amd_sched_fence_create(
- entity, sched_job->owner);
-
- if (!fence)
- return -ENOMEM;
-
- fence_get(&fence->base);
- sched_job->s_fence = fence;
+ trace_amd_sched_job(sched_job);
wait_event(entity->sched->job_scheduled,
amd_sched_entity_in(sched_job));
- trace_amd_sched_job(sched_job);
- return 0;
}
/**
@@ -310,22 +343,22 @@ static void amd_sched_wakeup(struct amd_gpu_scheduler *sched)
}
/**
- * Select next to run
+ * Select next entity to process
*/
-static struct amd_sched_job *
-amd_sched_select_job(struct amd_gpu_scheduler *sched)
+static struct amd_sched_entity *
+amd_sched_select_entity(struct amd_gpu_scheduler *sched)
{
- struct amd_sched_job *sched_job;
+ struct amd_sched_entity *entity;
if (!amd_sched_ready(sched))
return NULL;
/* Kernel run queue has higher priority than normal run queue*/
- sched_job = amd_sched_rq_select_job(&sched->kernel_rq);
- if (sched_job == NULL)
- sched_job = amd_sched_rq_select_job(&sched->sched_rq);
+ entity = amd_sched_rq_select_entity(&sched->kernel_rq);
+ if (entity == NULL)
+ entity = amd_sched_rq_select_entity(&sched->sched_rq);
- return sched_job;
+ return entity;
}
static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
@@ -343,6 +376,7 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
list_del_init(&s_fence->list);
spin_unlock_irqrestore(&sched->fence_list_lock, flags);
}
+ trace_amd_sched_process_job(s_fence);
fence_put(&s_fence->base);
wake_up_interruptible(&sched->wake_up_worker);
}
@@ -386,13 +420,16 @@ static int amd_sched_main(void *param)
unsigned long flags;
wait_event_interruptible(sched->wake_up_worker,
- kthread_should_stop() ||
- (sched_job = amd_sched_select_job(sched)));
+ (entity = amd_sched_select_entity(sched)) ||
+ kthread_should_stop());
+ if (!entity)
+ continue;
+
+ sched_job = amd_sched_entity_pop_job(entity);
if (!sched_job)
continue;
- entity = sched_job->s_entity;
s_fence = sched_job->s_fence;
if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
@@ -405,6 +442,7 @@ static int amd_sched_main(void *param)
atomic_inc(&sched->hw_rq_count);
fence = sched->ops->run_job(sched_job);
+ amd_sched_fence_scheduled(s_fence);
if (fence) {
r = fence_add_callback(fence, &s_fence->cb,
amd_sched_process_job);
@@ -450,6 +488,13 @@ int amd_sched_init(struct amd_gpu_scheduler *sched,
init_waitqueue_head(&sched->wake_up_worker);
init_waitqueue_head(&sched->job_scheduled);
atomic_set(&sched->hw_rq_count, 0);
+ if (atomic_inc_return(&sched_fence_slab_ref) == 1) {
+ sched_fence_slab = kmem_cache_create(
+ "amd_sched_fence", sizeof(struct amd_sched_fence), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!sched_fence_slab)
+ return -ENOMEM;
+ }
/* Each scheduler will run on a seperate kernel thread */
sched->thread = kthread_run(amd_sched_main, sched, sched->name);
@@ -470,4 +515,6 @@ void amd_sched_fini(struct amd_gpu_scheduler *sched)
{
if (sched->thread)
kthread_stop(sched->thread);
+ if (atomic_dec_and_test(&sched_fence_slab_ref))
+ kmem_cache_destroy(sched_fence_slab);
}
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 929e9aced041..a0f0ae53aacd 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -27,9 +27,14 @@
#include <linux/kfifo.h>
#include <linux/fence.h>
+#define AMD_SCHED_FENCE_SCHEDULED_BIT FENCE_FLAG_USER_BITS
+
struct amd_gpu_scheduler;
struct amd_sched_rq;
+extern struct kmem_cache *sched_fence_slab;
+extern atomic_t sched_fence_slab_ref;
+
/**
* A scheduler entity is a wrapper around a job queue or a group
* of other entities. Entities take turns emitting jobs from their
@@ -65,6 +70,7 @@ struct amd_sched_rq {
struct amd_sched_fence {
struct fence base;
struct fence_cb cb;
+ struct list_head scheduled_cb;
struct amd_gpu_scheduler *sched;
spinlock_t lock;
void *owner;
@@ -76,7 +82,6 @@ struct amd_sched_job {
struct amd_gpu_scheduler *sched;
struct amd_sched_entity *s_entity;
struct amd_sched_fence *s_fence;
- void *owner;
};
extern const struct fence_ops amd_sched_fence_ops;
@@ -128,11 +133,11 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
uint32_t jobs);
void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *entity);
-int amd_sched_entity_push_job(struct amd_sched_job *sched_job);
+void amd_sched_entity_push_job(struct amd_sched_job *sched_job);
struct amd_sched_fence *amd_sched_fence_create(
struct amd_sched_entity *s_entity, void *owner);
+void amd_sched_fence_scheduled(struct amd_sched_fence *fence);
void amd_sched_fence_signal(struct amd_sched_fence *fence);
-
#endif
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c
index d802638094f4..87c78eecea64 100644
--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
@@ -32,9 +32,11 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity
struct amd_sched_fence *fence = NULL;
unsigned seq;
- fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL);
+ fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL);
if (fence == NULL)
return NULL;
+
+ INIT_LIST_HEAD(&fence->scheduled_cb);
fence->owner = owner;
fence->sched = s_entity->sched;
spin_lock_init(&fence->lock);
@@ -55,6 +57,17 @@ void amd_sched_fence_signal(struct amd_sched_fence *fence)
FENCE_TRACE(&fence->base, "was already signaled\n");
}
+void amd_sched_fence_scheduled(struct amd_sched_fence *s_fence)
+{
+ struct fence_cb *cur, *tmp;
+
+ set_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &s_fence->base.flags);
+ list_for_each_entry_safe(cur, tmp, &s_fence->scheduled_cb, node) {
+ list_del_init(&cur->node);
+ cur->func(&s_fence->base, cur);
+ }
+}
+
static const char *amd_sched_fence_get_driver_name(struct fence *fence)
{
return "amd_sched";
@@ -71,11 +84,17 @@ static bool amd_sched_fence_enable_signaling(struct fence *f)
return true;
}
+static void amd_sched_fence_release(struct fence *f)
+{
+ struct amd_sched_fence *fence = to_amd_sched_fence(f);
+ kmem_cache_free(sched_fence_slab, fence);
+}
+
const struct fence_ops amd_sched_fence_ops = {
.get_driver_name = amd_sched_fence_get_driver_name,
.get_timeline_name = amd_sched_fence_get_timeline_name,
.enable_signaling = amd_sched_fence_enable_signaling,
.signaled = NULL,
.wait = fence_default_wait,
- .release = NULL,
+ .release = amd_sched_fence_release,
};
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 7bb3845d9974..aeee083c7f95 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1432,6 +1432,45 @@ static int atomic_set_prop(struct drm_atomic_state *state,
return ret;
}
+/**
+ * drm_atomic_update_old_fb -- Unset old_fb pointers and set plane->fb pointers.
+ *
+ * @dev: drm device to check.
+ * @plane_mask: plane mask for planes that were updated.
+ * @ret: return value, can be -EDEADLK for a retry.
+ *
+ * Before doing an update plane->old_fb is set to plane->fb,
+ * but before dropping the locks old_fb needs to be set to NULL
+ * and plane->fb updated. This is a common operation for each
+ * atomic update, so this call is split off as a helper.
+ */
+void drm_atomic_clean_old_fb(struct drm_device *dev,
+ unsigned plane_mask,
+ int ret)
+{
+ struct drm_plane *plane;
+
+ /* if succeeded, fixup legacy plane crtc/fb ptrs before dropping
+ * locks (ie. while it is still safe to deref plane->state). We
+ * need to do this here because the driver entry points cannot
+ * distinguish between legacy and atomic ioctls.
+ */
+ drm_for_each_plane_mask(plane, dev, plane_mask) {
+ if (ret == 0) {
+ struct drm_framebuffer *new_fb = plane->state->fb;
+ if (new_fb)
+ drm_framebuffer_reference(new_fb);
+ plane->fb = new_fb;
+ plane->crtc = plane->state->crtc;
+
+ if (plane->old_fb)
+ drm_framebuffer_unreference(plane->old_fb);
+ }
+ plane->old_fb = NULL;
+ }
+}
+EXPORT_SYMBOL(drm_atomic_clean_old_fb);
+
int drm_mode_atomic_ioctl(struct drm_device *dev,
void *data, struct drm_file *file_priv)
{
@@ -1446,7 +1485,7 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
struct drm_plane *plane;
struct drm_crtc *crtc;
struct drm_crtc_state *crtc_state;
- unsigned plane_mask = 0;
+ unsigned plane_mask;
int ret = 0;
unsigned int i, j;
@@ -1486,6 +1525,7 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
state->allow_modeset = !!(arg->flags & DRM_MODE_ATOMIC_ALLOW_MODESET);
retry:
+ plane_mask = 0;
copied_objs = 0;
copied_props = 0;
@@ -1576,24 +1616,7 @@ retry:
}
out:
- /* if succeeded, fixup legacy plane crtc/fb ptrs before dropping
- * locks (ie. while it is still safe to deref plane->state). We
- * need to do this here because the driver entry points cannot
- * distinguish between legacy and atomic ioctls.
- */
- drm_for_each_plane_mask(plane, dev, plane_mask) {
- if (ret == 0) {
- struct drm_framebuffer *new_fb = plane->state->fb;
- if (new_fb)
- drm_framebuffer_reference(new_fb);
- plane->fb = new_fb;
- plane->crtc = plane->state->crtc;
-
- if (plane->old_fb)
- drm_framebuffer_unreference(plane->old_fb);
- }
- plane->old_fb = NULL;
- }
+ drm_atomic_clean_old_fb(dev, plane_mask, ret);
if (ret && arg->flags & DRM_MODE_PAGE_FLIP_EVENT) {
/*
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 0c6f62168776..e5aec45bf985 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -210,6 +210,14 @@ update_connector_routing(struct drm_atomic_state *state, int conn_idx)
return -EINVAL;
}
+ if (!drm_encoder_crtc_ok(new_encoder, connector_state->crtc)) {
+ DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] incompatible with [CRTC:%d]\n",
+ new_encoder->base.id,
+ new_encoder->name,
+ connector_state->crtc->base.id);
+ return -EINVAL;
+ }
+
if (new_encoder == connector_state->best_encoder) {
DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] keeps [ENCODER:%d:%s], now on [CRTC:%d]\n",
connector->base.id,
@@ -1553,6 +1561,9 @@ retry:
goto fail;
}
+ if (plane_state->crtc && (plane == plane->crtc->cursor))
+ plane_state->state->legacy_cursor_update = true;
+
ret = __drm_atomic_helper_disable_plane(plane, plane_state);
if (ret != 0)
goto fail;
@@ -1605,9 +1616,6 @@ int __drm_atomic_helper_disable_plane(struct drm_plane *plane,
plane_state->src_h = 0;
plane_state->src_w = 0;
- if (plane->crtc && (plane == plane->crtc->cursor))
- plane_state->state->legacy_cursor_update = true;
-
return 0;
}
@@ -1741,6 +1749,7 @@ int __drm_atomic_helper_set_config(struct drm_mode_set *set,
struct drm_crtc_state *crtc_state;
struct drm_plane_state *primary_state;
struct drm_crtc *crtc = set->crtc;
+ int hdisplay, vdisplay;
int ret;
crtc_state = drm_atomic_get_crtc_state(state, crtc);
@@ -1783,19 +1792,21 @@ int __drm_atomic_helper_set_config(struct drm_mode_set *set,
if (ret != 0)
return ret;
+ drm_crtc_get_hv_timing(set->mode, &hdisplay, &vdisplay);
+
drm_atomic_set_fb_for_plane(primary_state, set->fb);
primary_state->crtc_x = 0;
primary_state->crtc_y = 0;
- primary_state->crtc_h = set->mode->vdisplay;
- primary_state->crtc_w = set->mode->hdisplay;
+ primary_state->crtc_h = vdisplay;
+ primary_state->crtc_w = hdisplay;
primary_state->src_x = set->x << 16;
primary_state->src_y = set->y << 16;
if (primary_state->rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270))) {
- primary_state->src_h = set->mode->hdisplay << 16;
- primary_state->src_w = set->mode->vdisplay << 16;
+ primary_state->src_h = hdisplay << 16;
+ primary_state->src_w = vdisplay << 16;
} else {
- primary_state->src_h = set->mode->vdisplay << 16;
- primary_state->src_w = set->mode->hdisplay << 16;
+ primary_state->src_h = vdisplay << 16;
+ primary_state->src_w = hdisplay << 16;
}
commit:
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 9362609df38a..7dd6728dd092 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -160,6 +160,11 @@ int drm_setmaster_ioctl(struct drm_device *dev, void *data,
goto out_unlock;
}
+ if (!file_priv->allowed_master) {
+ ret = drm_new_set_master(dev, file_priv);
+ goto out_unlock;
+ }
+
file_priv->minor->master = drm_master_get(file_priv->master);
file_priv->is_master = 1;
if (dev->driver->master_set) {
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index e673c13c7391..69cbab5e5c81 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -342,6 +342,7 @@ static int restore_fbdev_mode_atomic(struct drm_fb_helper *fb_helper)
struct drm_plane *plane;
struct drm_atomic_state *state;
int i, ret;
+ unsigned plane_mask;
state = drm_atomic_state_alloc(dev);
if (!state)
@@ -349,11 +350,10 @@ static int restore_fbdev_mode_atomic(struct drm_fb_helper *fb_helper)
state->acquire_ctx = dev->mode_config.acquire_ctx;
retry:
+ plane_mask = 0;
drm_for_each_plane(plane, dev) {
struct drm_plane_state *plane_state;
- plane->old_fb = plane->fb;
-
plane_state = drm_atomic_get_plane_state(state, plane);
if (IS_ERR(plane_state)) {
ret = PTR_ERR(plane_state);
@@ -362,6 +362,9 @@ retry:
plane_state->rotation = BIT(DRM_ROTATE_0);
+ plane->old_fb = plane->fb;
+ plane_mask |= 1 << drm_plane_index(plane);
+
/* disable non-primary: */
if (plane->type == DRM_PLANE_TYPE_PRIMARY)
continue;
@@ -382,19 +385,7 @@ retry:
ret = drm_atomic_commit(state);
fail:
- drm_for_each_plane(plane, dev) {
- if (ret == 0) {
- struct drm_framebuffer *new_fb = plane->state->fb;
- if (new_fb)
- drm_framebuffer_reference(new_fb);
- plane->fb = new_fb;
- plane->crtc = plane->state->crtc;
-
- if (plane->old_fb)
- drm_framebuffer_unreference(plane->old_fb);
- }
- plane->old_fb = NULL;
- }
+ drm_atomic_clean_old_fb(dev, plane_mask, ret);
if (ret == -EDEADLK)
goto backoff;
@@ -1236,7 +1227,9 @@ static int pan_display_atomic(struct fb_var_screeninfo *var,
struct drm_fb_helper *fb_helper = info->par;
struct drm_device *dev = fb_helper->dev;
struct drm_atomic_state *state;
+ struct drm_plane *plane;
int i, ret;
+ unsigned plane_mask;
state = drm_atomic_state_alloc(dev);
if (!state)
@@ -1244,19 +1237,22 @@ static int pan_display_atomic(struct fb_var_screeninfo *var,
state->acquire_ctx = dev->mode_config.acquire_ctx;
retry:
+ plane_mask = 0;
for(i = 0; i < fb_helper->crtc_count; i++) {
struct drm_mode_set *mode_set;
mode_set = &fb_helper->crtc_info[i].mode_set;
- mode_set->crtc->primary->old_fb = mode_set->crtc->primary->fb;
-
mode_set->x = var->xoffset;
mode_set->y = var->yoffset;
ret = __drm_atomic_helper_set_config(mode_set, state);
if (ret != 0)
goto fail;
+
+ plane = mode_set->crtc->primary;
+ plane_mask |= drm_plane_index(plane);
+ plane->old_fb = plane->fb;
}
ret = drm_atomic_commit(state);
@@ -1268,26 +1264,7 @@ retry:
fail:
- for(i = 0; i < fb_helper->crtc_count; i++) {
- struct drm_mode_set *mode_set;
- struct drm_plane *plane;
-
- mode_set = &fb_helper->crtc_info[i].mode_set;
- plane = mode_set->crtc->primary;
-
- if (ret == 0) {
- struct drm_framebuffer *new_fb = plane->state->fb;
-
- if (new_fb)
- drm_framebuffer_reference(new_fb);
- plane->fb = new_fb;
- plane->crtc = plane->state->crtc;
-
- if (plane->old_fb)
- drm_framebuffer_unreference(plane->old_fb);
- }
- plane->old_fb = NULL;
- }
+ drm_atomic_clean_old_fb(dev, plane_mask, ret);
if (ret == -EDEADLK)
goto backoff;
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index c59ce4d0ef75..6b5625e66119 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -126,6 +126,60 @@ static int drm_cpu_valid(void)
}
/**
+ * drm_new_set_master - Allocate a new master object and become master for the
+ * associated master realm.
+ *
+ * @dev: The associated device.
+ * @fpriv: File private identifying the client.
+ *
+ * This function must be called with dev::struct_mutex held.
+ * Returns negative error code on failure. Zero on success.
+ */
+int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv)
+{
+ struct drm_master *old_master;
+ int ret;
+
+ lockdep_assert_held_once(&dev->master_mutex);
+
+ /* create a new master */
+ fpriv->minor->master = drm_master_create(fpriv->minor);
+ if (!fpriv->minor->master)
+ return -ENOMEM;
+
+ /* take another reference for the copy in the local file priv */
+ old_master = fpriv->master;
+ fpriv->master = drm_master_get(fpriv->minor->master);
+
+ if (dev->driver->master_create) {
+ ret = dev->driver->master_create(dev, fpriv->master);
+ if (ret)
+ goto out_err;
+ }
+ if (dev->driver->master_set) {
+ ret = dev->driver->master_set(dev, fpriv, true);
+ if (ret)
+ goto out_err;
+ }
+
+ fpriv->is_master = 1;
+ fpriv->allowed_master = 1;
+ fpriv->authenticated = 1;
+ if (old_master)
+ drm_master_put(&old_master);
+
+ return 0;
+
+out_err:
+ /* drop both references and restore old master on failure */
+ drm_master_put(&fpriv->minor->master);
+ drm_master_put(&fpriv->master);
+ fpriv->master = old_master;
+
+ return ret;
+}
+
+/**
* Called whenever a process opens /dev/drm.
*
* \param filp file pointer.
@@ -189,35 +243,9 @@ static int drm_open_helper(struct file *filp, struct drm_minor *minor)
mutex_lock(&dev->master_mutex);
if (drm_is_primary_client(priv) && !priv->minor->master) {
/* create a new master */
- priv->minor->master = drm_master_create(priv->minor);
- if (!priv->minor->master) {
- ret = -ENOMEM;
+ ret = drm_new_set_master(dev, priv);
+ if (ret)
goto out_close;
- }
-
- priv->is_master = 1;
- /* take another reference for the copy in the local file priv */
- priv->master = drm_master_get(priv->minor->master);
- priv->authenticated = 1;
-
- if (dev->driver->master_create) {
- ret = dev->driver->master_create(dev, priv->master);
- if (ret) {
- /* drop both references if this fails */
- drm_master_put(&priv->minor->master);
- drm_master_put(&priv->master);
- goto out_close;
- }
- }
- if (dev->driver->master_set) {
- ret = dev->driver->master_set(dev, priv, true);
- if (ret) {
- /* drop both references if this fails */
- drm_master_put(&priv->minor->master);
- drm_master_put(&priv->master);
- goto out_close;
- }
- }
} else if (drm_is_primary_client(priv)) {
/* get a reference to the master */
priv->master = drm_master_get(priv->minor->master);
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 2151ea551d3b..607f493ae801 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -980,7 +980,8 @@ static void send_vblank_event(struct drm_device *dev,
struct drm_pending_vblank_event *e,
unsigned long seq, struct timeval *now)
{
- WARN_ON_SMP(!spin_is_locked(&dev->event_lock));
+ assert_spin_locked(&dev->event_lock);
+
e->event.sequence = seq;
e->event.tv_sec = now->tv_sec;
e->event.tv_usec = now->tv_usec;
@@ -993,6 +994,57 @@ static void send_vblank_event(struct drm_device *dev,
}
/**
+ * drm_arm_vblank_event - arm vblank event after pageflip
+ * @dev: DRM device
+ * @pipe: CRTC index
+ * @e: the event to prepare to send
+ *
+ * A lot of drivers need to generate vblank events for the very next vblank
+ * interrupt. For example when the page flip interrupt happens when the page
+ * flip gets armed, but not when it actually executes within the next vblank
+ * period. This helper function implements exactly the required vblank arming
+ * behaviour.
+ *
+ * Caller must hold event lock. Caller must also hold a vblank reference for
+ * the event @e, which will be dropped when the next vblank arrives.
+ *
+ * This is the legacy version of drm_crtc_arm_vblank_event().
+ */
+void drm_arm_vblank_event(struct drm_device *dev, unsigned int pipe,
+ struct drm_pending_vblank_event *e)
+{
+ assert_spin_locked(&dev->event_lock);
+
+ e->pipe = pipe;
+ e->event.sequence = drm_vblank_count(dev, pipe);
+ list_add_tail(&e->base.link, &dev->vblank_event_list);
+}
+EXPORT_SYMBOL(drm_arm_vblank_event);
+
+/**
+ * drm_crtc_arm_vblank_event - arm vblank event after pageflip
+ * @crtc: the source CRTC of the vblank event
+ * @e: the event to send
+ *
+ * A lot of drivers need to generate vblank events for the very next vblank
+ * interrupt. For example when the page flip interrupt happens when the page
+ * flip gets armed, but not when it actually executes within the next vblank
+ * period. This helper function implements exactly the required vblank arming
+ * behaviour.
+ *
+ * Caller must hold event lock. Caller must also hold a vblank reference for
+ * the event @e, which will be dropped when the next vblank arrives.
+ *
+ * This is the native KMS version of drm_arm_vblank_event().
+ */
+void drm_crtc_arm_vblank_event(struct drm_crtc *crtc,
+ struct drm_pending_vblank_event *e)
+{
+ drm_arm_vblank_event(crtc->dev, drm_crtc_index(crtc), e);
+}
+EXPORT_SYMBOL(drm_crtc_arm_vblank_event);
+
+/**
* drm_send_vblank_event - helper to send vblank event after pageflip
* @dev: DRM device
* @pipe: CRTC index
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index a18164f2f6d2..f8b5fcfa91a2 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -229,7 +229,8 @@ static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connect
mode_flags |= DRM_MODE_FLAG_3D_MASK;
list_for_each_entry(mode, &connector->modes, head) {
- mode->status = drm_mode_validate_basic(mode);
+ if (mode->status == MODE_OK)
+ mode->status = drm_mode_validate_basic(mode);
if (mode->status == MODE_OK)
mode->status = drm_mode_validate_size(mode, maxX, maxY);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
index b3ba27fd9a6b..e69357172ffb 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
@@ -55,6 +55,9 @@ static int exynos_crtc_atomic_check(struct drm_crtc *crtc,
{
struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
+ if (!state->enable)
+ return 0;
+
if (exynos_crtc->ops->atomic_check)
return exynos_crtc->ops->atomic_check(exynos_crtc, state);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a3b22bdacd44..8aab974b0564 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2734,6 +2734,8 @@ static const char *power_domain_str(enum intel_display_power_domain domain)
return "AUX_C";
case POWER_DOMAIN_AUX_D:
return "AUX_D";
+ case POWER_DOMAIN_GMBUS:
+ return "GMBUS";
case POWER_DOMAIN_INIT:
return "INIT";
default:
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8afda459a26e..f4af19a0d569 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -199,6 +199,7 @@ enum intel_display_power_domain {
POWER_DOMAIN_AUX_B,
POWER_DOMAIN_AUX_C,
POWER_DOMAIN_AUX_D,
+ POWER_DOMAIN_GMBUS,
POWER_DOMAIN_INIT,
POWER_DOMAIN_NUM,
@@ -351,6 +352,8 @@ enum intel_dpll_id {
/* hsw/bdw */
DPLL_ID_WRPLL1 = 0,
DPLL_ID_WRPLL2 = 1,
+ DPLL_ID_SPLL = 2,
+
/* skl */
DPLL_ID_SKL_DPLL1 = 0,
DPLL_ID_SKL_DPLL2 = 1,
@@ -367,6 +370,7 @@ struct intel_dpll_hw_state {
/* hsw, bdw */
uint32_t wrpll;
+ uint32_t spll;
/* skl */
/*
@@ -2189,8 +2193,17 @@ struct drm_i915_gem_request {
struct drm_i915_private *i915;
struct intel_engine_cs *ring;
- /** GEM sequence number associated with this request. */
- uint32_t seqno;
+ /** GEM sequence number associated with the previous request,
+ * when the HWS breadcrumb is equal to this the GPU is processing
+ * this request.
+ */
+ u32 previous_seqno;
+
+ /** GEM sequence number associated with this request,
+ * when the HWS breadcrumb is equal or greater than this the GPU
+ * has finished processing this request.
+ */
+ u32 seqno;
/** Position in the ringbuffer of the start of the request */
u32 head;
@@ -2648,6 +2661,7 @@ struct i915_params {
int enable_cmd_parser;
/* leave bools at the end to not create holes */
bool enable_hangcheck;
+ bool fastboot;
bool prefault_disable;
bool load_detect_test;
bool reset;
@@ -2834,6 +2848,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
u32 flags);
+void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
int __must_check i915_vma_unbind(struct i915_vma *vma);
/*
* BEWARE: Do not use the function below unless you can _absolutely_
@@ -2905,15 +2920,17 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
return (int32_t)(seq1 - seq2) >= 0;
}
+static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
+ bool lazy_coherency)
+{
+ u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
+ return i915_seqno_passed(seqno, req->previous_seqno);
+}
+
static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
bool lazy_coherency)
{
- u32 seqno;
-
- BUG_ON(req == NULL);
-
- seqno = req->ring->get_seqno(req->ring, lazy_coherency);
-
+ u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
return i915_seqno_passed(seqno, req->seqno);
}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5cf4a1998273..f56af0aaafde 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1146,23 +1146,74 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
}
-static int __i915_spin_request(struct drm_i915_gem_request *req)
+static unsigned long local_clock_us(unsigned *cpu)
+{
+ unsigned long t;
+
+ /* Cheaply and approximately convert from nanoseconds to microseconds.
+ * The result and subsequent calculations are also defined in the same
+ * approximate microseconds units. The principal source of timing
+ * error here is from the simple truncation.
+ *
+ * Note that local_clock() is only defined wrt to the current CPU;
+ * the comparisons are no longer valid if we switch CPUs. Instead of
+ * blocking preemption for the entire busywait, we can detect the CPU
+ * switch and use that as indicator of system load and a reason to
+ * stop busywaiting, see busywait_stop().
+ */
+ *cpu = get_cpu();
+ t = local_clock() >> 10;
+ put_cpu();
+
+ return t;
+}
+
+static bool busywait_stop(unsigned long timeout, unsigned cpu)
+{
+ unsigned this_cpu;
+
+ if (time_after(local_clock_us(&this_cpu), timeout))
+ return true;
+
+ return this_cpu != cpu;
+}
+
+static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
{
unsigned long timeout;
+ unsigned cpu;
+
+ /* When waiting for high frequency requests, e.g. during synchronous
+ * rendering split between the CPU and GPU, the finite amount of time
+ * required to set up the irq and wait upon it limits the response
+ * rate. By busywaiting on the request completion for a short while we
+ * can service the high frequency waits as quick as possible. However,
+ * if it is a slow request, we want to sleep as quickly as possible.
+ * The tradeoff between waiting and sleeping is roughly the time it
+ * takes to sleep on a request, on the order of a microsecond.
+ */
- if (i915_gem_request_get_ring(req)->irq_refcount)
+ if (req->ring->irq_refcount)
return -EBUSY;
- timeout = jiffies + 1;
+ /* Only spin if we know the GPU is processing this request */
+ if (!i915_gem_request_started(req, true))
+ return -EAGAIN;
+
+ timeout = local_clock_us(&cpu) + 5;
while (!need_resched()) {
if (i915_gem_request_completed(req, true))
return 0;
- if (time_after_eq(jiffies, timeout))
+ if (signal_pending_state(state, current))
+ break;
+
+ if (busywait_stop(timeout, cpu))
break;
cpu_relax_lowlatency();
}
+
if (i915_gem_request_completed(req, false))
return 0;
@@ -1197,6 +1248,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
struct drm_i915_private *dev_priv = dev->dev_private;
const bool irq_test_in_progress =
ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
+ int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
DEFINE_WAIT(wait);
unsigned long timeout_expire;
s64 before, now;
@@ -1210,8 +1262,16 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
if (i915_gem_request_completed(req, true))
return 0;
- timeout_expire = timeout ?
- jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0;
+ timeout_expire = 0;
+ if (timeout) {
+ if (WARN_ON(*timeout < 0))
+ return -EINVAL;
+
+ if (*timeout == 0)
+ return -ETIME;
+
+ timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
+ }
if (INTEL_INFO(dev_priv)->gen >= 6)
gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
@@ -1221,7 +1281,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
before = ktime_get_raw_ns();
/* Optimistic spin for the next jiffie before touching IRQs */
- ret = __i915_spin_request(req);
+ ret = __i915_spin_request(req, state);
if (ret == 0)
goto out;
@@ -1233,8 +1293,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
for (;;) {
struct timer_list timer;
- prepare_to_wait(&ring->irq_queue, &wait,
- interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(&ring->irq_queue, &wait, state);
/* We need to check whether any gpu reset happened in between
* the caller grabbing the seqno and now ... */
@@ -1252,7 +1311,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
break;
}
- if (interruptible && signal_pending(current)) {
+ if (signal_pending_state(state, current)) {
ret = -ERESTARTSYS;
break;
}
@@ -2546,6 +2605,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
request->batch_obj = obj;
request->emitted_jiffies = jiffies;
+ request->previous_seqno = ring->last_submitted_seqno;
ring->last_submitted_seqno = request->seqno;
list_add_tail(&request->list, &ring->request_list);
@@ -3809,6 +3869,7 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
+ struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_caching *args = data;
struct drm_i915_gem_object *obj;
enum i915_cache_level level;
@@ -3837,9 +3898,11 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
+ intel_runtime_pm_get(dev_priv);
+
ret = i915_mutex_lock_interruptible(dev);
if (ret)
- return ret;
+ goto rpm_put;
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
if (&obj->base == NULL) {
@@ -3852,6 +3915,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
drm_gem_object_unreference(&obj->base);
unlock:
mutex_unlock(&dev->struct_mutex);
+rpm_put:
+ intel_runtime_pm_put(dev_priv);
+
return ret;
}
@@ -4066,6 +4132,29 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
return false;
}
+void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
+{
+ struct drm_i915_gem_object *obj = vma->obj;
+ bool mappable, fenceable;
+ u32 fence_size, fence_alignment;
+
+ fence_size = i915_gem_get_gtt_size(obj->base.dev,
+ obj->base.size,
+ obj->tiling_mode);
+ fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
+ obj->base.size,
+ obj->tiling_mode,
+ true);
+
+ fenceable = (vma->node.size == fence_size &&
+ (vma->node.start & (fence_alignment - 1)) == 0);
+
+ mappable = (vma->node.start + fence_size <=
+ to_i915(obj->base.dev)->gtt.mappable_end);
+
+ obj->map_and_fenceable = mappable && fenceable;
+}
+
static int
i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
@@ -4133,25 +4222,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
(bound ^ vma->bound) & GLOBAL_BIND) {
- bool mappable, fenceable;
- u32 fence_size, fence_alignment;
-
- fence_size = i915_gem_get_gtt_size(obj->base.dev,
- obj->base.size,
- obj->tiling_mode);
- fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
- obj->base.size,
- obj->tiling_mode,
- true);
-
- fenceable = (vma->node.size == fence_size &&
- (vma->node.start & (fence_alignment - 1)) == 0);
-
- mappable = (vma->node.start + fence_size <=
- dev_priv->gtt.mappable_end);
-
- obj->map_and_fenceable = mappable && fenceable;
-
+ __i915_vma_set_map_and_fenceable(vma);
WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 8c688a5f1589..02ceb7a4b481 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -141,8 +141,6 @@ static void i915_gem_context_clean(struct intel_context *ctx)
if (!ppgtt)
return;
- WARN_ON(!list_empty(&ppgtt->base.active_list));
-
list_for_each_entry_safe(vma, next, &ppgtt->base.inactive_list,
mm_list) {
if (WARN_ON(__i915_vma_unbind_no_wait(vma)))
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index 40a10b25956c..f010391b87f5 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -642,11 +642,10 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
}
/* check for L-shaped memory aka modified enhanced addressing */
- if (IS_GEN4(dev)) {
- uint32_t ddc2 = I915_READ(DCC2);
-
- if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE))
- dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
+ if (IS_GEN4(dev) &&
+ !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
+ swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+ swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
}
if (dcc == 0xffffffff) {
@@ -675,16 +674,35 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
* matching, which was the case for the swizzling required in
* the table above, or from the 1-ch value being less than
* the minimum size of a rank.
+ *
+ * Reports indicate that the swizzling actually
+ * varies depending upon page placement inside the
+ * channels, i.e. we see swizzled pages where the
+ * banks of memory are paired and unswizzled on the
+ * uneven portion, so leave that as unknown.
*/
- if (I915_READ16(C0DRB3) != I915_READ16(C1DRB3)) {
- swizzle_x = I915_BIT_6_SWIZZLE_NONE;
- swizzle_y = I915_BIT_6_SWIZZLE_NONE;
- } else {
+ if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) {
swizzle_x = I915_BIT_6_SWIZZLE_9_10;
swizzle_y = I915_BIT_6_SWIZZLE_9;
}
}
+ if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
+ swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) {
+ /* Userspace likes to explode if it sees unknown swizzling,
+ * so lie. We will finish the lie when reporting through
+ * the get-tiling-ioctl by reporting the physical swizzle
+ * mode as unknown instead.
+ *
+ * As we don't strictly know what the swizzling is, it may be
+ * bit17 dependent, and so we need to also prevent the pages
+ * from being moved.
+ */
+ dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
+ swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+ swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+ }
+
dev_priv->mm.bit_6_swizzle_x = swizzle_x;
dev_priv->mm.bit_6_swizzle_y = swizzle_y;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 43f35d12b677..86c7500454b4 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2676,6 +2676,7 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
return ret;
}
vma->bound |= GLOBAL_BIND;
+ __i915_vma_set_map_and_fenceable(vma);
list_add_tail(&vma->mm_list, &ggtt_vm->inactive_list);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index cdacf3f5b77a..87e919a06b27 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -687,6 +687,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
}
vma->bound |= GLOBAL_BIND;
+ __i915_vma_set_map_and_fenceable(vma);
list_add_tail(&vma->mm_list, &ggtt->inactive_list);
}
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 96bb23865eac..4be13a5eb932 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -40,6 +40,7 @@ struct i915_params i915 __read_mostly = {
.preliminary_hw_support = IS_ENABLED(CONFIG_DRM_I915_PRELIMINARY_HW_SUPPORT),
.disable_power_well = -1,
.enable_ips = 1,
+ .fastboot = 0,
.prefault_disable = 0,
.load_detect_test = 0,
.reset = true,
@@ -133,6 +134,10 @@ MODULE_PARM_DESC(disable_power_well,
module_param_named_unsafe(enable_ips, i915.enable_ips, int, 0600);
MODULE_PARM_DESC(enable_ips, "Enable IPS (default: true)");
+module_param_named(fastboot, i915.fastboot, bool, 0600);
+MODULE_PARM_DESC(fastboot,
+ "Try to skip unnecessary mode sets at boot time (default: false)");
+
module_param_named_unsafe(prefault_disable, i915.prefault_disable, bool, 0600);
MODULE_PARM_DESC(prefault_disable,
"Disable page prefaulting for pread/pwrite/reloc (default:false). "
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index b84aaa0bb48a..6a2c76e367a5 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -138,18 +138,6 @@ static void hsw_crt_get_config(struct intel_encoder *encoder,
pipe_config->base.adjusted_mode.flags |= intel_crt_get_flags(encoder);
}
-static void hsw_crt_pre_enable(struct intel_encoder *encoder)
-{
- struct drm_device *dev = encoder->base.dev;
- struct drm_i915_private *dev_priv = dev->dev_private;
-
- WARN(I915_READ(SPLL_CTL) & SPLL_PLL_ENABLE, "SPLL already enabled\n");
- I915_WRITE(SPLL_CTL,
- SPLL_PLL_ENABLE | SPLL_PLL_FREQ_1350MHz | SPLL_PLL_SSC);
- POSTING_READ(SPLL_CTL);
- udelay(20);
-}
-
/* Note: The caller is required to filter out dpms modes not supported by the
* platform. */
static void intel_crt_set_dpms(struct intel_encoder *encoder, int mode)
@@ -216,19 +204,6 @@ static void pch_post_disable_crt(struct intel_encoder *encoder)
intel_disable_crt(encoder);
}
-static void hsw_crt_post_disable(struct intel_encoder *encoder)
-{
- struct drm_device *dev = encoder->base.dev;
- struct drm_i915_private *dev_priv = dev->dev_private;
- uint32_t val;
-
- DRM_DEBUG_KMS("Disabling SPLL\n");
- val = I915_READ(SPLL_CTL);
- WARN_ON(!(val & SPLL_PLL_ENABLE));
- I915_WRITE(SPLL_CTL, val & ~SPLL_PLL_ENABLE);
- POSTING_READ(SPLL_CTL);
-}
-
static void intel_enable_crt(struct intel_encoder *encoder)
{
struct intel_crt *crt = intel_encoder_to_crt(encoder);
@@ -280,6 +255,10 @@ static bool intel_crt_compute_config(struct intel_encoder *encoder,
if (HAS_DDI(dev)) {
pipe_config->ddi_pll_sel = PORT_CLK_SEL_SPLL;
pipe_config->port_clock = 135000 * 2;
+
+ pipe_config->dpll_hw_state.wrpll = 0;
+ pipe_config->dpll_hw_state.spll =
+ SPLL_PLL_ENABLE | SPLL_PLL_FREQ_1350MHz | SPLL_PLL_SSC;
}
return true;
@@ -860,8 +839,6 @@ void intel_crt_init(struct drm_device *dev)
if (HAS_DDI(dev)) {
crt->base.get_config = hsw_crt_get_config;
crt->base.get_hw_state = intel_ddi_get_hw_state;
- crt->base.pre_enable = hsw_crt_pre_enable;
- crt->base.post_disable = hsw_crt_post_disable;
} else {
crt->base.get_config = intel_crt_get_config;
crt->base.get_hw_state = intel_crt_get_hw_state;
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index b25e99a432fb..a6752a61d99f 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -1286,6 +1286,18 @@ hsw_ddi_pll_select(struct intel_crtc *intel_crtc,
}
crtc_state->ddi_pll_sel = PORT_CLK_SEL_WRPLL(pll->id);
+ } else if (crtc_state->ddi_pll_sel == PORT_CLK_SEL_SPLL) {
+ struct drm_atomic_state *state = crtc_state->base.state;
+ struct intel_shared_dpll_config *spll =
+ &intel_atomic_get_shared_dpll_state(state)[DPLL_ID_SPLL];
+
+ if (spll->crtc_mask &&
+ WARN_ON(spll->hw_state.spll != crtc_state->dpll_hw_state.spll))
+ return false;
+
+ crtc_state->shared_dpll = DPLL_ID_SPLL;
+ spll->hw_state.spll = crtc_state->dpll_hw_state.spll;
+ spll->crtc_mask |= 1 << intel_crtc->pipe;
}
return true;
@@ -2437,7 +2449,7 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder)
}
}
-static void hsw_ddi_pll_enable(struct drm_i915_private *dev_priv,
+static void hsw_ddi_wrpll_enable(struct drm_i915_private *dev_priv,
struct intel_shared_dpll *pll)
{
I915_WRITE(WRPLL_CTL(pll->id), pll->config.hw_state.wrpll);
@@ -2445,9 +2457,17 @@ static void hsw_ddi_pll_enable(struct drm_i915_private *dev_priv,
udelay(20);
}
-static void hsw_ddi_pll_disable(struct drm_i915_private *dev_priv,
+static void hsw_ddi_spll_enable(struct drm_i915_private *dev_priv,
struct intel_shared_dpll *pll)
{
+ I915_WRITE(SPLL_CTL, pll->config.hw_state.spll);
+ POSTING_READ(SPLL_CTL);
+ udelay(20);
+}
+
+static void hsw_ddi_wrpll_disable(struct drm_i915_private *dev_priv,
+ struct intel_shared_dpll *pll)
+{
uint32_t val;
val = I915_READ(WRPLL_CTL(pll->id));
@@ -2455,9 +2475,19 @@ static void hsw_ddi_pll_disable(struct drm_i915_private *dev_priv,
POSTING_READ(WRPLL_CTL(pll->id));
}
-static bool hsw_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
- struct intel_shared_dpll *pll,
- struct intel_dpll_hw_state *hw_state)
+static void hsw_ddi_spll_disable(struct drm_i915_private *dev_priv,
+ struct intel_shared_dpll *pll)
+{
+ uint32_t val;
+
+ val = I915_READ(SPLL_CTL);
+ I915_WRITE(SPLL_CTL, val & ~SPLL_PLL_ENABLE);
+ POSTING_READ(SPLL_CTL);
+}
+
+static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv,
+ struct intel_shared_dpll *pll,
+ struct intel_dpll_hw_state *hw_state)
{
uint32_t val;
@@ -2470,25 +2500,50 @@ static bool hsw_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
return val & WRPLL_PLL_ENABLE;
}
+static bool hsw_ddi_spll_get_hw_state(struct drm_i915_private *dev_priv,
+ struct intel_shared_dpll *pll,
+ struct intel_dpll_hw_state *hw_state)
+{
+ uint32_t val;
+
+ if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+ return false;
+
+ val = I915_READ(SPLL_CTL);
+ hw_state->spll = val;
+
+ return val & SPLL_PLL_ENABLE;
+}
+
+
static const char * const hsw_ddi_pll_names[] = {
"WRPLL 1",
"WRPLL 2",
+ "SPLL"
};
static void hsw_shared_dplls_init(struct drm_i915_private *dev_priv)
{
int i;
- dev_priv->num_shared_dpll = 2;
+ dev_priv->num_shared_dpll = 3;
- for (i = 0; i < dev_priv->num_shared_dpll; i++) {
+ for (i = 0; i < 2; i++) {
dev_priv->shared_dplls[i].id = i;
dev_priv->shared_dplls[i].name = hsw_ddi_pll_names[i];
- dev_priv->shared_dplls[i].disable = hsw_ddi_pll_disable;
- dev_priv->shared_dplls[i].enable = hsw_ddi_pll_enable;
+ dev_priv->shared_dplls[i].disable = hsw_ddi_wrpll_disable;
+ dev_priv->shared_dplls[i].enable = hsw_ddi_wrpll_enable;
dev_priv->shared_dplls[i].get_hw_state =
- hsw_ddi_pll_get_hw_state;
+ hsw_ddi_wrpll_get_hw_state;
}
+
+ /* SPLL is special, but needs to be initialized anyway.. */
+ dev_priv->shared_dplls[i].id = i;
+ dev_priv->shared_dplls[i].name = hsw_ddi_pll_names[i];
+ dev_priv->shared_dplls[i].disable = hsw_ddi_spll_disable;
+ dev_priv->shared_dplls[i].enable = hsw_ddi_spll_enable;
+ dev_priv->shared_dplls[i].get_hw_state = hsw_ddi_spll_get_hw_state;
+
}
static const char * const skl_ddi_pll_names[] = {
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f62ffc04c21d..32cf97346978 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -116,6 +116,7 @@ static void skylake_pfit_enable(struct intel_crtc *crtc);
static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force);
static void ironlake_pfit_enable(struct intel_crtc *crtc);
static void intel_modeset_setup_hw_state(struct drm_device *dev);
+static void intel_pre_disable_primary(struct drm_crtc *crtc);
typedef struct {
int min, max;
@@ -2607,6 +2608,8 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
struct drm_i915_gem_object *obj;
struct drm_plane *primary = intel_crtc->base.primary;
struct drm_plane_state *plane_state = primary->state;
+ struct drm_crtc_state *crtc_state = intel_crtc->base.state;
+ struct intel_plane *intel_plane = to_intel_plane(primary);
struct drm_framebuffer *fb;
if (!plane_config->fb)
@@ -2643,14 +2646,28 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
}
}
+ /*
+ * We've failed to reconstruct the BIOS FB. Current display state
+ * indicates that the primary plane is visible, but has a NULL FB,
+ * which will lead to problems later if we don't fix it up. The
+ * simplest solution is to just disable the primary plane now and
+ * pretend the BIOS never had it enabled.
+ */
+ to_intel_plane_state(plane_state)->visible = false;
+ crtc_state->plane_mask &= ~(1 << drm_plane_index(primary));
+ intel_pre_disable_primary(&intel_crtc->base);
+ intel_plane->disable_plane(primary, &intel_crtc->base);
+
return;
valid_fb:
- plane_state->src_x = plane_state->src_y = 0;
+ plane_state->src_x = 0;
+ plane_state->src_y = 0;
plane_state->src_w = fb->width << 16;
plane_state->src_h = fb->height << 16;
- plane_state->crtc_x = plane_state->src_y = 0;
+ plane_state->crtc_x = 0;
+ plane_state->crtc_y = 0;
plane_state->crtc_w = fb->width;
plane_state->crtc_h = fb->height;
@@ -4237,6 +4254,7 @@ struct intel_shared_dpll *intel_get_shared_dpll(struct intel_crtc *crtc,
struct intel_shared_dpll *pll;
struct intel_shared_dpll_config *shared_dpll;
enum intel_dpll_id i;
+ int max = dev_priv->num_shared_dpll;
shared_dpll = intel_atomic_get_shared_dpll_state(crtc_state->base.state);
@@ -4271,9 +4289,11 @@ struct intel_shared_dpll *intel_get_shared_dpll(struct intel_crtc *crtc,
WARN_ON(shared_dpll[i].crtc_mask);
goto found;
- }
+ } else if (INTEL_INFO(dev_priv)->gen < 9 && HAS_DDI(dev_priv))
+ /* Do not consider SPLL */
+ max = 2;
- for (i = 0; i < dev_priv->num_shared_dpll; i++) {
+ for (i = 0; i < max; i++) {
pll = &dev_priv->shared_dplls[i];
/* Only want to check enabled timings first */
@@ -5189,11 +5209,31 @@ static enum intel_display_power_domain port_to_power_domain(enum port port)
case PORT_E:
return POWER_DOMAIN_PORT_DDI_E_2_LANES;
default:
- WARN_ON_ONCE(1);
+ MISSING_CASE(port);
return POWER_DOMAIN_PORT_OTHER;
}
}
+static enum intel_display_power_domain port_to_aux_power_domain(enum port port)
+{
+ switch (port) {
+ case PORT_A:
+ return POWER_DOMAIN_AUX_A;
+ case PORT_B:
+ return POWER_DOMAIN_AUX_B;
+ case PORT_C:
+ return POWER_DOMAIN_AUX_C;
+ case PORT_D:
+ return POWER_DOMAIN_AUX_D;
+ case PORT_E:
+ /* FIXME: Check VBT for actual wiring of PORT E */
+ return POWER_DOMAIN_AUX_D;
+ default:
+ MISSING_CASE(port);
+ return POWER_DOMAIN_AUX_A;
+ }
+}
+
#define for_each_power_domain(domain, mask) \
for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \
if ((1 << (domain)) & (mask))
@@ -5225,6 +5265,36 @@ intel_display_port_power_domain(struct intel_encoder *intel_encoder)
}
}
+enum intel_display_power_domain
+intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder)
+{
+ struct drm_device *dev = intel_encoder->base.dev;
+ struct intel_digital_port *intel_dig_port;
+
+ switch (intel_encoder->type) {
+ case INTEL_OUTPUT_UNKNOWN:
+ case INTEL_OUTPUT_HDMI:
+ /*
+ * Only DDI platforms should ever use these output types.
+ * We can get here after the HDMI detect code has already set
+ * the type of the shared encoder. Since we can't be sure
+ * what's the status of the given connectors, play safe and
+ * run the DP detection too.
+ */
+ WARN_ON_ONCE(!HAS_DDI(dev));
+ case INTEL_OUTPUT_DISPLAYPORT:
+ case INTEL_OUTPUT_EDP:
+ intel_dig_port = enc_to_dig_port(&intel_encoder->base);
+ return port_to_aux_power_domain(intel_dig_port->port);
+ case INTEL_OUTPUT_DP_MST:
+ intel_dig_port = enc_to_mst(&intel_encoder->base)->primary;
+ return port_to_aux_power_domain(intel_dig_port->port);
+ default:
+ MISSING_CASE(intel_encoder->type);
+ return POWER_DOMAIN_AUX_A;
+ }
+}
+
static unsigned long get_crtc_power_domains(struct drm_crtc *crtc)
{
struct drm_device *dev = crtc->dev;
@@ -6254,9 +6324,11 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
if (to_intel_plane_state(crtc->primary->state)->visible) {
intel_crtc_wait_for_pending_flips(crtc);
intel_pre_disable_primary(crtc);
+
+ intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary));
+ to_intel_plane_state(crtc->primary->state)->visible = false;
}
- intel_crtc_disable_planes(crtc, crtc->state->plane_mask);
dev_priv->display.crtc_disable(crtc);
intel_crtc->active = false;
intel_update_watermarks(crtc);
@@ -9723,6 +9795,8 @@ static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv,
case PORT_CLK_SEL_WRPLL2:
pipe_config->shared_dpll = DPLL_ID_WRPLL2;
break;
+ case PORT_CLK_SEL_SPLL:
+ pipe_config->shared_dpll = DPLL_ID_SPLL;
}
}
@@ -9851,14 +9925,14 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
return true;
}
-static void i845_update_cursor(struct drm_crtc *crtc, u32 base)
+static void i845_update_cursor(struct drm_crtc *crtc, u32 base, bool on)
{
struct drm_device *dev = crtc->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
uint32_t cntl = 0, size = 0;
- if (base) {
+ if (on) {
unsigned int width = intel_crtc->base.cursor->state->crtc_w;
unsigned int height = intel_crtc->base.cursor->state->crtc_h;
unsigned int stride = roundup_pow_of_two(width) * 4;
@@ -9913,16 +9987,15 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base)
}
}
-static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base)
+static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, bool on)
{
struct drm_device *dev = crtc->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
int pipe = intel_crtc->pipe;
- uint32_t cntl;
+ uint32_t cntl = 0;
- cntl = 0;
- if (base) {
+ if (on) {
cntl = MCURSOR_GAMMA_ENABLE;
switch (intel_crtc->base.cursor->state->crtc_w) {
case 64:
@@ -9973,18 +10046,17 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc,
int y = cursor_state->crtc_y;
u32 base = 0, pos = 0;
- if (on)
- base = intel_crtc->cursor_addr;
+ base = intel_crtc->cursor_addr;
if (x >= intel_crtc->config->pipe_src_w)
- base = 0;
+ on = false;
if (y >= intel_crtc->config->pipe_src_h)
- base = 0;
+ on = false;
if (x < 0) {
if (x + cursor_state->crtc_w <= 0)
- base = 0;
+ on = false;
pos |= CURSOR_POS_SIGN << CURSOR_X_SHIFT;
x = -x;
@@ -9993,16 +10065,13 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc,
if (y < 0) {
if (y + cursor_state->crtc_h <= 0)
- base = 0;
+ on = false;
pos |= CURSOR_POS_SIGN << CURSOR_Y_SHIFT;
y = -y;
}
pos |= y << CURSOR_Y_SHIFT;
- if (base == 0 && intel_crtc->cursor_base == 0)
- return;
-
I915_WRITE(CURPOS(pipe), pos);
/* ILK+ do this automagically */
@@ -10013,9 +10082,9 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc,
}
if (IS_845G(dev) || IS_I865G(dev))
- i845_update_cursor(crtc, base);
+ i845_update_cursor(crtc, base, on);
else
- i9xx_update_cursor(crtc, base);
+ i9xx_update_cursor(crtc, base, on);
}
static bool cursor_size_ok(struct drm_device *dev,
@@ -12003,9 +12072,10 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc,
pipe_config->dpll_hw_state.cfgcr1,
pipe_config->dpll_hw_state.cfgcr2);
} else if (HAS_DDI(dev)) {
- DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: wrpll: 0x%x\n",
+ DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: wrpll: 0x%x spll: 0x%x\n",
pipe_config->ddi_pll_sel,
- pipe_config->dpll_hw_state.wrpll);
+ pipe_config->dpll_hw_state.wrpll,
+ pipe_config->dpll_hw_state.spll);
} else {
DRM_DEBUG_KMS("dpll_hw_state: dpll: 0x%x, dpll_md: 0x%x, "
"fp0: 0x%x, fp1: 0x%x\n",
@@ -12053,18 +12123,22 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc,
static bool check_digital_port_conflicts(struct drm_atomic_state *state)
{
struct drm_device *dev = state->dev;
- struct intel_encoder *encoder;
struct drm_connector *connector;
- struct drm_connector_state *connector_state;
unsigned int used_ports = 0;
- int i;
/*
* Walk the connector list instead of the encoder
* list to detect the problem on ddi platforms
* where there's just one encoder per digital port.
*/
- for_each_connector_in_state(state, connector, connector_state, i) {
+ drm_for_each_connector(connector, dev) {
+ struct drm_connector_state *connector_state;
+ struct intel_encoder *encoder;
+
+ connector_state = drm_atomic_get_existing_connector_state(state, connector);
+ if (!connector_state)
+ connector_state = connector->state;
+
if (!connector_state->best_encoder)
continue;
@@ -12452,7 +12526,6 @@ intel_pipe_config_compare(struct drm_device *dev,
if (INTEL_INFO(dev)->gen < 8) {
PIPE_CONF_CHECK_M_N(dp_m_n);
- PIPE_CONF_CHECK_I(has_drrs);
if (current_config->has_drrs)
PIPE_CONF_CHECK_M_N(dp_m2_n2);
} else
@@ -12528,6 +12601,7 @@ intel_pipe_config_compare(struct drm_device *dev,
PIPE_CONF_CHECK_X(dpll_hw_state.fp0);
PIPE_CONF_CHECK_X(dpll_hw_state.fp1);
PIPE_CONF_CHECK_X(dpll_hw_state.wrpll);
+ PIPE_CONF_CHECK_X(dpll_hw_state.spll);
PIPE_CONF_CHECK_X(dpll_hw_state.ctrl1);
PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr1);
PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr2);
@@ -13032,6 +13106,9 @@ static int intel_atomic_check(struct drm_device *dev,
struct intel_crtc_state *pipe_config =
to_intel_crtc_state(crtc_state);
+ memset(&to_intel_crtc(crtc)->atomic, 0,
+ sizeof(struct intel_crtc_atomic_commit));
+
/* Catch I915_MODE_FLAG_INHERITED */
if (crtc_state->mode.private_flags != crtc->state->mode.private_flags)
crtc_state->mode_changed = true;
@@ -13056,7 +13133,8 @@ static int intel_atomic_check(struct drm_device *dev,
if (ret)
return ret;
- if (intel_pipe_config_compare(state->dev,
+ if (i915.fastboot &&
+ intel_pipe_config_compare(state->dev,
to_intel_crtc_state(crtc->state),
pipe_config, true)) {
crtc_state->mode_changed = false;
@@ -13654,6 +13732,7 @@ intel_check_cursor_plane(struct drm_plane *plane,
struct drm_crtc *crtc = crtc_state->base.crtc;
struct drm_framebuffer *fb = state->base.fb;
struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+ enum pipe pipe = to_intel_plane(plane)->pipe;
unsigned stride;
int ret;
@@ -13687,6 +13766,22 @@ intel_check_cursor_plane(struct drm_plane *plane,
return -EINVAL;
}
+ /*
+ * There's something wrong with the cursor on CHV pipe C.
+ * If it straddles the left edge of the screen then
+ * moving it away from the edge or disabling it often
+ * results in a pipe underrun, and often that can lead to
+ * dead pipe (constant underrun reported, and it scans
+ * out just a solid color). To recover from that, the
+ * display power well must be turned off and on again.
+ * Refuse the put the cursor into that compromised position.
+ */
+ if (IS_CHERRYVIEW(plane->dev) && pipe == PIPE_C &&
+ state->visible && state->base.crtc_x < 0) {
+ DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n");
+ return -EINVAL;
+ }
+
return 0;
}
@@ -13710,9 +13805,6 @@ intel_commit_cursor_plane(struct drm_plane *plane,
crtc = crtc ? crtc : plane->crtc;
intel_crtc = to_intel_crtc(crtc);
- if (intel_crtc->cursor_bo == obj)
- goto update;
-
if (!obj)
addr = 0;
else if (!INTEL_INFO(dev)->cursor_needs_physical)
@@ -13721,9 +13813,7 @@ intel_commit_cursor_plane(struct drm_plane *plane,
addr = obj->phys_handle->busaddr;
intel_crtc->cursor_addr = addr;
- intel_crtc->cursor_bo = obj;
-update:
if (crtc->state->active)
intel_crtc_update_cursor(crtc, state->visible);
}
@@ -14364,16 +14454,17 @@ static int intel_framebuffer_init(struct drm_device *dev,
static struct drm_framebuffer *
intel_user_framebuffer_create(struct drm_device *dev,
struct drm_file *filp,
- struct drm_mode_fb_cmd2 *mode_cmd)
+ struct drm_mode_fb_cmd2 *user_mode_cmd)
{
struct drm_i915_gem_object *obj;
+ struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd;
obj = to_intel_bo(drm_gem_object_lookup(dev, filp,
- mode_cmd->handles[0]));
+ mode_cmd.handles[0]));
if (&obj->base == NULL)
return ERR_PTR(-ENOENT);
- return intel_framebuffer_create(dev, mode_cmd, obj);
+ return intel_framebuffer_create(dev, &mode_cmd, obj);
}
#ifndef CONFIG_DRM_FBDEV_EMULATION
@@ -14705,6 +14796,9 @@ static struct intel_quirk intel_quirks[] = {
/* Apple Macbook 2,1 (Core 2 T7400) */
{ 0x27a2, 0x8086, 0x7270, quirk_backlight_present },
+ /* Apple Macbook 4,1 */
+ { 0x2a02, 0x106b, 0x00a1, quirk_backlight_present },
+
/* Toshiba CB35 Chromebook (Celeron 2955U) */
{ 0x0a06, 0x1179, 0x0a88, quirk_backlight_present },
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 09bdd94ca3ba..78b8ec84d576 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -277,7 +277,7 @@ static void pps_lock(struct intel_dp *intel_dp)
* See vlv_power_sequencer_reset() why we need
* a power domain reference here.
*/
- power_domain = intel_display_port_power_domain(encoder);
+ power_domain = intel_display_port_aux_power_domain(encoder);
intel_display_power_get(dev_priv, power_domain);
mutex_lock(&dev_priv->pps_mutex);
@@ -293,7 +293,7 @@ static void pps_unlock(struct intel_dp *intel_dp)
mutex_unlock(&dev_priv->pps_mutex);
- power_domain = intel_display_port_power_domain(encoder);
+ power_domain = intel_display_port_aux_power_domain(encoder);
intel_display_power_put(dev_priv, power_domain);
}
@@ -816,8 +816,6 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
intel_dp_check_edp(intel_dp);
- intel_aux_display_runtime_get(dev_priv);
-
/* Try to wait for any previous AUX channel activity */
for (try = 0; try < 3; try++) {
status = I915_READ_NOTRACE(ch_ctl);
@@ -926,7 +924,6 @@ done:
ret = recv_bytes;
out:
pm_qos_update_request(&dev_priv->pm_qos, PM_QOS_DEFAULT_VALUE);
- intel_aux_display_runtime_put(dev_priv);
if (vdd)
edp_panel_vdd_off(intel_dp, false);
@@ -1784,7 +1781,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp)
if (edp_have_panel_vdd(intel_dp))
return need_to_disable;
- power_domain = intel_display_port_power_domain(intel_encoder);
+ power_domain = intel_display_port_aux_power_domain(intel_encoder);
intel_display_power_get(dev_priv, power_domain);
DRM_DEBUG_KMS("Turning eDP port %c VDD on\n",
@@ -1874,7 +1871,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp)
if ((pp & POWER_TARGET_ON) == 0)
intel_dp->last_power_cycle = jiffies;
- power_domain = intel_display_port_power_domain(intel_encoder);
+ power_domain = intel_display_port_aux_power_domain(intel_encoder);
intel_display_power_put(dev_priv, power_domain);
}
@@ -2025,7 +2022,7 @@ static void edp_panel_off(struct intel_dp *intel_dp)
wait_panel_off(intel_dp);
/* We got a reference when we enabled the VDD. */
- power_domain = intel_display_port_power_domain(intel_encoder);
+ power_domain = intel_display_port_aux_power_domain(intel_encoder);
intel_display_power_put(dev_priv, power_domain);
}
@@ -4765,26 +4762,6 @@ intel_dp_unset_edid(struct intel_dp *intel_dp)
intel_dp->has_audio = false;
}
-static enum intel_display_power_domain
-intel_dp_power_get(struct intel_dp *dp)
-{
- struct intel_encoder *encoder = &dp_to_dig_port(dp)->base;
- enum intel_display_power_domain power_domain;
-
- power_domain = intel_display_port_power_domain(encoder);
- intel_display_power_get(to_i915(encoder->base.dev), power_domain);
-
- return power_domain;
-}
-
-static void
-intel_dp_power_put(struct intel_dp *dp,
- enum intel_display_power_domain power_domain)
-{
- struct intel_encoder *encoder = &dp_to_dig_port(dp)->base;
- intel_display_power_put(to_i915(encoder->base.dev), power_domain);
-}
-
static enum drm_connector_status
intel_dp_detect(struct drm_connector *connector, bool force)
{
@@ -4808,7 +4785,8 @@ intel_dp_detect(struct drm_connector *connector, bool force)
return connector_status_disconnected;
}
- power_domain = intel_dp_power_get(intel_dp);
+ power_domain = intel_display_port_aux_power_domain(intel_encoder);
+ intel_display_power_get(to_i915(dev), power_domain);
/* Can't disconnect eDP, but you can close the lid... */
if (is_edp(intel_dp))
@@ -4853,7 +4831,7 @@ intel_dp_detect(struct drm_connector *connector, bool force)
}
out:
- intel_dp_power_put(intel_dp, power_domain);
+ intel_display_power_put(to_i915(dev), power_domain);
return status;
}
@@ -4862,6 +4840,7 @@ intel_dp_force(struct drm_connector *connector)
{
struct intel_dp *intel_dp = intel_attached_dp(connector);
struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base;
+ struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev);
enum intel_display_power_domain power_domain;
DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
@@ -4871,11 +4850,12 @@ intel_dp_force(struct drm_connector *connector)
if (connector->status != connector_status_connected)
return;
- power_domain = intel_dp_power_get(intel_dp);
+ power_domain = intel_display_port_aux_power_domain(intel_encoder);
+ intel_display_power_get(dev_priv, power_domain);
intel_dp_set_edid(intel_dp);
- intel_dp_power_put(intel_dp, power_domain);
+ intel_display_power_put(dev_priv, power_domain);
if (intel_encoder->type != INTEL_OUTPUT_EDP)
intel_encoder->type = INTEL_OUTPUT_DISPLAYPORT;
@@ -5091,7 +5071,7 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp)
* indefinitely.
*/
DRM_DEBUG_KMS("VDD left on by BIOS, adjusting state tracking\n");
- power_domain = intel_display_port_power_domain(&intel_dig_port->base);
+ power_domain = intel_display_port_aux_power_domain(&intel_dig_port->base);
intel_display_power_get(dev_priv, power_domain);
edp_panel_vdd_schedule_off(intel_dp);
@@ -5153,7 +5133,8 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
enum intel_display_power_domain power_domain;
enum irqreturn ret = IRQ_NONE;
- if (intel_dig_port->base.type != INTEL_OUTPUT_EDP)
+ if (intel_dig_port->base.type != INTEL_OUTPUT_EDP &&
+ intel_dig_port->base.type != INTEL_OUTPUT_HDMI)
intel_dig_port->base.type = INTEL_OUTPUT_DISPLAYPORT;
if (long_hpd && intel_dig_port->base.type == INTEL_OUTPUT_EDP) {
@@ -5172,7 +5153,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
port_name(intel_dig_port->port),
long_hpd ? "long" : "short");
- power_domain = intel_display_port_power_domain(intel_encoder);
+ power_domain = intel_display_port_aux_power_domain(intel_encoder);
intel_display_power_get(dev_priv, power_domain);
if (long_hpd) {
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 0598932ce623..0d00f07b7163 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -550,7 +550,6 @@ struct intel_crtc {
int adjusted_x;
int adjusted_y;
- struct drm_i915_gem_object *cursor_bo;
uint32_t cursor_addr;
uint32_t cursor_cntl;
uint32_t cursor_size;
@@ -1169,6 +1168,8 @@ void hsw_enable_ips(struct intel_crtc *crtc);
void hsw_disable_ips(struct intel_crtc *crtc);
enum intel_display_power_domain
intel_display_port_power_domain(struct intel_encoder *intel_encoder);
+enum intel_display_power_domain
+intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder);
void intel_mode_from_pipe_config(struct drm_display_mode *mode,
struct intel_crtc_state *pipe_config);
void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc);
@@ -1377,8 +1378,6 @@ void intel_display_power_get(struct drm_i915_private *dev_priv,
enum intel_display_power_domain domain);
void intel_display_power_put(struct drm_i915_private *dev_priv,
enum intel_display_power_domain domain);
-void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv);
-void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv);
void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
void intel_runtime_pm_put(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 9eafa191cee2..e6c035b0fc1c 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1335,21 +1335,17 @@ intel_hdmi_set_edid(struct drm_connector *connector, bool force)
{
struct drm_i915_private *dev_priv = to_i915(connector->dev);
struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
- struct intel_encoder *intel_encoder =
- &hdmi_to_dig_port(intel_hdmi)->base;
- enum intel_display_power_domain power_domain;
struct edid *edid = NULL;
bool connected = false;
- power_domain = intel_display_port_power_domain(intel_encoder);
- intel_display_power_get(dev_priv, power_domain);
+ intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
if (force)
edid = drm_get_edid(connector,
intel_gmbus_get_adapter(dev_priv,
intel_hdmi->ddc_bus));
- intel_display_power_put(dev_priv, power_domain);
+ intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
to_intel_connector(connector)->detect_edid = edid;
if (edid && edid->input & DRM_EDID_INPUT_DIGITAL) {
@@ -1378,15 +1374,18 @@ intel_hdmi_detect(struct drm_connector *connector, bool force)
struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
struct drm_i915_private *dev_priv = to_i915(connector->dev);
bool live_status = false;
- unsigned int retry = 3;
+ unsigned int try;
DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
connector->base.id, connector->name);
- while (!live_status && --retry) {
+ intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
+
+ for (try = 0; !live_status && try < 9; try++) {
+ if (try)
+ msleep(10);
live_status = intel_digital_port_connected(dev_priv,
hdmi_to_dig_port(intel_hdmi));
- mdelay(10);
}
if (!live_status)
@@ -1402,6 +1401,8 @@ intel_hdmi_detect(struct drm_connector *connector, bool force)
} else
status = connector_status_disconnected;
+ intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
+
return status;
}
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index 1369fc41d039..8324654037b6 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -483,7 +483,7 @@ gmbus_xfer(struct i2c_adapter *adapter,
int i = 0, inc, try = 0;
int ret = 0;
- intel_aux_display_runtime_get(dev_priv);
+ intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
mutex_lock(&dev_priv->gmbus_mutex);
if (bus->force_bit) {
@@ -595,7 +595,9 @@ timeout:
out:
mutex_unlock(&dev_priv->gmbus_mutex);
- intel_aux_display_runtime_put(dev_priv);
+
+ intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
+
return ret;
}
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index d52a15df6917..f091ad12d694 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4449,7 +4449,7 @@ static void gen6_set_rps(struct drm_device *dev, u8 val)
POSTING_READ(GEN6_RPNSWREQ);
dev_priv->rps.cur_freq = val;
- trace_intel_gpu_freq_change(val * 50);
+ trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
}
static void valleyview_set_rps(struct drm_device *dev, u8 val)
@@ -4782,8 +4782,7 @@ static void gen9_enable_rc6(struct drm_device *dev)
/* 2b: Program RC6 thresholds.*/
/* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */
- if (IS_SKYLAKE(dev) && !((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) &&
- (INTEL_REVID(dev) <= SKL_REVID_E0)))
+ if (IS_SKYLAKE(dev))
I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
else
I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
@@ -4825,7 +4824,7 @@ static void gen9_enable_rc6(struct drm_device *dev)
* WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6.
*/
if ((IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) ||
- ((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) && (INTEL_REVID(dev) <= SKL_REVID_E0)))
+ ((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) && (INTEL_REVID(dev) <= SKL_REVID_F0)))
I915_WRITE(GEN9_PG_ENABLE, 0);
else
I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
@@ -7255,7 +7254,8 @@ static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
{
if (IS_GEN9(dev_priv->dev))
- return (val * GT_FREQUENCY_MULTIPLIER) / GEN9_FREQ_SCALER;
+ return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
+ GEN9_FREQ_SCALER);
else if (IS_CHERRYVIEW(dev_priv->dev))
return chv_gpu_freq(dev_priv, val);
else if (IS_VALLEYVIEW(dev_priv->dev))
@@ -7267,13 +7267,14 @@ int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
{
if (IS_GEN9(dev_priv->dev))
- return (val * GEN9_FREQ_SCALER) / GT_FREQUENCY_MULTIPLIER;
+ return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
+ GT_FREQUENCY_MULTIPLIER);
else if (IS_CHERRYVIEW(dev_priv->dev))
return chv_freq_opcode(dev_priv, val);
else if (IS_VALLEYVIEW(dev_priv->dev))
return byt_freq_opcode(dev_priv, val);
else
- return val / GT_FREQUENCY_MULTIPLIER;
+ return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
}
struct request_boost {
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index d89c1d0aa1b7..7e23d65c9b24 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -362,6 +362,7 @@ static void hsw_set_power_well(struct drm_i915_private *dev_priv,
BIT(POWER_DOMAIN_AUX_C) | \
BIT(POWER_DOMAIN_AUDIO) | \
BIT(POWER_DOMAIN_VGA) | \
+ BIT(POWER_DOMAIN_GMBUS) | \
BIT(POWER_DOMAIN_INIT))
#define BXT_DISPLAY_POWERWELL_1_POWER_DOMAINS ( \
BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS | \
@@ -1483,6 +1484,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
BIT(POWER_DOMAIN_AUX_B) | \
BIT(POWER_DOMAIN_AUX_C) | \
BIT(POWER_DOMAIN_AUX_D) | \
+ BIT(POWER_DOMAIN_GMBUS) | \
BIT(POWER_DOMAIN_INIT))
#define HSW_DISPLAY_POWER_DOMAINS ( \
(POWER_DOMAIN_MASK & ~HSW_ALWAYS_ON_POWER_DOMAINS) | \
@@ -1845,6 +1847,8 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv)
i915.disable_power_well = sanitize_disable_power_well_option(dev_priv,
i915.disable_power_well);
+ BUILD_BUG_ON(POWER_DOMAIN_NUM > 31);
+
mutex_init(&power_domains->lock);
/*
@@ -2064,36 +2068,6 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv)
}
/**
- * intel_aux_display_runtime_get - grab an auxiliary power domain reference
- * @dev_priv: i915 device instance
- *
- * This function grabs a power domain reference for the auxiliary power domain
- * (for access to the GMBUS and DP AUX blocks) and ensures that it and all its
- * parents are powered up. Therefore users should only grab a reference to the
- * innermost power domain they need.
- *
- * Any power domain reference obtained by this function must have a symmetric
- * call to intel_aux_display_runtime_put() to release the reference again.
- */
-void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv)
-{
- intel_runtime_pm_get(dev_priv);
-}
-
-/**
- * intel_aux_display_runtime_put - release an auxiliary power domain reference
- * @dev_priv: i915 device instance
- *
- * This function drops the auxiliary power domain reference obtained by
- * intel_aux_display_runtime_get() and might power down the corresponding
- * hardware block right away if this is the last reference.
- */
-void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv)
-{
- intel_runtime_pm_put(dev_priv);
-}
-
-/**
* intel_runtime_pm_get - grab a runtime pm reference
* @dev_priv: i915 device instance
*
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index 64f16ea779ef..7b990b4e96d2 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c
@@ -63,8 +63,7 @@ static void imx_drm_driver_lastclose(struct drm_device *drm)
#if IS_ENABLED(CONFIG_DRM_IMX_FB_HELPER)
struct imx_drm_device *imxdrm = drm->dev_private;
- if (imxdrm->fbhelper)
- drm_fbdev_cma_restore_mode(imxdrm->fbhelper);
+ drm_fbdev_cma_restore_mode(imxdrm->fbhelper);
#endif
}
@@ -340,7 +339,7 @@ err_kms:
* imx_drm_add_crtc - add a new crtc
*/
int imx_drm_add_crtc(struct drm_device *drm, struct drm_crtc *crtc,
- struct imx_drm_crtc **new_crtc,
+ struct imx_drm_crtc **new_crtc, struct drm_plane *primary_plane,
const struct imx_drm_crtc_helper_funcs *imx_drm_helper_funcs,
struct device_node *port)
{
@@ -379,7 +378,7 @@ int imx_drm_add_crtc(struct drm_device *drm, struct drm_crtc *crtc,
drm_crtc_helper_add(crtc,
imx_drm_crtc->imx_drm_helper_funcs.crtc_helper_funcs);
- drm_crtc_init(drm, crtc,
+ drm_crtc_init_with_planes(drm, crtc, primary_plane, NULL,
imx_drm_crtc->imx_drm_helper_funcs.crtc_funcs);
return 0;
diff --git a/drivers/gpu/drm/imx/imx-drm.h b/drivers/gpu/drm/imx/imx-drm.h
index 28e776d8d9d2..83284b4d4be1 100644
--- a/drivers/gpu/drm/imx/imx-drm.h
+++ b/drivers/gpu/drm/imx/imx-drm.h
@@ -9,6 +9,7 @@ struct drm_display_mode;
struct drm_encoder;
struct drm_fbdev_cma;
struct drm_framebuffer;
+struct drm_plane;
struct imx_drm_crtc;
struct platform_device;
@@ -24,7 +25,7 @@ struct imx_drm_crtc_helper_funcs {
};
int imx_drm_add_crtc(struct drm_device *drm, struct drm_crtc *crtc,
- struct imx_drm_crtc **new_crtc,
+ struct imx_drm_crtc **new_crtc, struct drm_plane *primary_plane,
const struct imx_drm_crtc_helper_funcs *imx_helper_funcs,
struct device_node *port);
int imx_drm_remove_crtc(struct imx_drm_crtc *);
diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c
index e671ad369416..f9597146dc67 100644
--- a/drivers/gpu/drm/imx/imx-tve.c
+++ b/drivers/gpu/drm/imx/imx-tve.c
@@ -721,6 +721,7 @@ static const struct of_device_id imx_tve_dt_ids[] = {
{ .compatible = "fsl,imx53-tve", },
{ /* sentinel */ }
};
+MODULE_DEVICE_TABLE(of, imx_tve_dt_ids);
static struct platform_driver imx_tve_driver = {
.probe = imx_tve_probe,
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index 7bc8301fafff..4ab841eebee1 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -212,7 +212,8 @@ static void ipu_crtc_handle_pageflip(struct ipu_crtc *ipu_crtc)
spin_lock_irqsave(&drm->event_lock, flags);
if (ipu_crtc->page_flip_event)
- drm_send_vblank_event(drm, -1, ipu_crtc->page_flip_event);
+ drm_crtc_send_vblank_event(&ipu_crtc->base,
+ ipu_crtc->page_flip_event);
ipu_crtc->page_flip_event = NULL;
imx_drm_crtc_vblank_put(ipu_crtc->imx_crtc);
spin_unlock_irqrestore(&drm->event_lock, flags);
@@ -349,7 +350,6 @@ static int ipu_crtc_init(struct ipu_crtc *ipu_crtc,
struct ipu_soc *ipu = dev_get_drvdata(ipu_crtc->dev->parent);
int dp = -EINVAL;
int ret;
- int id;
ret = ipu_get_resources(ipu_crtc, pdata);
if (ret) {
@@ -358,18 +358,23 @@ static int ipu_crtc_init(struct ipu_crtc *ipu_crtc,
return ret;
}
+ if (pdata->dp >= 0)
+ dp = IPU_DP_FLOW_SYNC_BG;
+ ipu_crtc->plane[0] = ipu_plane_init(drm, ipu, pdata->dma[0], dp, 0,
+ DRM_PLANE_TYPE_PRIMARY);
+ if (IS_ERR(ipu_crtc->plane[0])) {
+ ret = PTR_ERR(ipu_crtc->plane[0]);
+ goto err_put_resources;
+ }
+
ret = imx_drm_add_crtc(drm, &ipu_crtc->base, &ipu_crtc->imx_crtc,
- &ipu_crtc_helper_funcs, ipu_crtc->dev->of_node);
+ &ipu_crtc->plane[0]->base, &ipu_crtc_helper_funcs,
+ ipu_crtc->dev->of_node);
if (ret) {
dev_err(ipu_crtc->dev, "adding crtc failed with %d.\n", ret);
goto err_put_resources;
}
- if (pdata->dp >= 0)
- dp = IPU_DP_FLOW_SYNC_BG;
- id = imx_drm_crtc_id(ipu_crtc->imx_crtc);
- ipu_crtc->plane[0] = ipu_plane_init(ipu_crtc->base.dev, ipu,
- pdata->dma[0], dp, BIT(id), true);
ret = ipu_plane_get_resources(ipu_crtc->plane[0]);
if (ret) {
dev_err(ipu_crtc->dev, "getting plane 0 resources failed with %d.\n",
@@ -379,10 +384,10 @@ static int ipu_crtc_init(struct ipu_crtc *ipu_crtc,
/* If this crtc is using the DP, add an overlay plane */
if (pdata->dp >= 0 && pdata->dma[1] > 0) {
- ipu_crtc->plane[1] = ipu_plane_init(ipu_crtc->base.dev, ipu,
- pdata->dma[1],
- IPU_DP_FLOW_SYNC_FG,
- BIT(id), false);
+ ipu_crtc->plane[1] = ipu_plane_init(drm, ipu, pdata->dma[1],
+ IPU_DP_FLOW_SYNC_FG,
+ drm_crtc_mask(&ipu_crtc->base),
+ DRM_PLANE_TYPE_OVERLAY);
if (IS_ERR(ipu_crtc->plane[1]))
ipu_crtc->plane[1] = NULL;
}
@@ -407,28 +412,6 @@ err_put_resources:
return ret;
}
-static struct device_node *ipu_drm_get_port_by_id(struct device_node *parent,
- int port_id)
-{
- struct device_node *port;
- int id, ret;
-
- port = of_get_child_by_name(parent, "port");
- while (port) {
- ret = of_property_read_u32(port, "reg", &id);
- if (!ret && id == port_id)
- return port;
-
- do {
- port = of_get_next_child(parent, port);
- if (!port)
- return NULL;
- } while (of_node_cmp(port->name, "port"));
- }
-
- return NULL;
-}
-
static int ipu_drm_bind(struct device *dev, struct device *master, void *data)
{
struct ipu_client_platformdata *pdata = dev->platform_data;
@@ -470,23 +453,11 @@ static const struct component_ops ipu_crtc_ops = {
static int ipu_drm_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
- struct ipu_client_platformdata *pdata = dev->platform_data;
int ret;
if (!dev->platform_data)
return -EINVAL;
- if (!dev->of_node) {
- /* Associate crtc device with the corresponding DI port node */
- dev->of_node = ipu_drm_get_port_by_id(dev->parent->of_node,
- pdata->di + 2);
- if (!dev->of_node) {
- dev_err(dev, "missing port@%d node in %s\n",
- pdata->di + 2, dev->parent->of_node->full_name);
- return -ENODEV;
- }
- }
-
ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
if (ret)
return ret;
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index 575f4c84388f..e2ff410bab74 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -381,7 +381,7 @@ static struct drm_plane_funcs ipu_plane_funcs = {
struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
int dma, int dp, unsigned int possible_crtcs,
- bool priv)
+ enum drm_plane_type type)
{
struct ipu_plane *ipu_plane;
int ret;
@@ -399,10 +399,9 @@ struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
ipu_plane->dma = dma;
ipu_plane->dp_flow = dp;
- ret = drm_plane_init(dev, &ipu_plane->base, possible_crtcs,
- &ipu_plane_funcs, ipu_plane_formats,
- ARRAY_SIZE(ipu_plane_formats),
- priv);
+ ret = drm_universal_plane_init(dev, &ipu_plane->base, possible_crtcs,
+ &ipu_plane_funcs, ipu_plane_formats,
+ ARRAY_SIZE(ipu_plane_formats), type);
if (ret) {
DRM_ERROR("failed to initialize plane\n");
kfree(ipu_plane);
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.h b/drivers/gpu/drm/imx/ipuv3-plane.h
index 9b5eff18f5b8..3a443b413c60 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.h
+++ b/drivers/gpu/drm/imx/ipuv3-plane.h
@@ -34,7 +34,7 @@ struct ipu_plane {
struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
int dma, int dp, unsigned int possible_crtcs,
- bool priv);
+ enum drm_plane_type type);
/* Init IDMAC, DMFC, DP */
int ipu_plane_mode_set(struct ipu_plane *plane, struct drm_crtc *crtc,
diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c
index b4deb9cf9d71..2e9b9f1b5cd2 100644
--- a/drivers/gpu/drm/imx/parallel-display.c
+++ b/drivers/gpu/drm/imx/parallel-display.c
@@ -54,7 +54,11 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector)
if (imxpd->panel && imxpd->panel->funcs &&
imxpd->panel->funcs->get_modes) {
+ struct drm_display_info *di = &connector->display_info;
+
num_modes = imxpd->panel->funcs->get_modes(imxpd->panel);
+ if (!imxpd->bus_format && di->num_bus_formats)
+ imxpd->bus_format = di->bus_formats[0];
if (num_modes > 0)
return num_modes;
}
diff --git a/drivers/gpu/drm/mgag200/mgag200_cursor.c b/drivers/gpu/drm/mgag200/mgag200_cursor.c
index 4f2068fe5d88..a7bf6a90eae5 100644
--- a/drivers/gpu/drm/mgag200/mgag200_cursor.c
+++ b/drivers/gpu/drm/mgag200/mgag200_cursor.c
@@ -70,6 +70,11 @@ int mga_crtc_cursor_set(struct drm_crtc *crtc,
BUG_ON(pixels_2 != pixels_current && pixels_2 != pixels_prev);
BUG_ON(pixels_current == pixels_prev);
+ if (!handle || !file_priv) {
+ mga_hide_cursor(mdev);
+ return 0;
+ }
+
obj = drm_gem_object_lookup(dev, file_priv, handle);
if (!obj)
return -ENOENT;
@@ -88,12 +93,6 @@ int mga_crtc_cursor_set(struct drm_crtc *crtc,
goto out_unreserve1;
}
- if (!handle) {
- mga_hide_cursor(mdev);
- ret = 0;
- goto out1;
- }
-
/* Move cursor buffers into VRAM if they aren't already */
if (!pixels_1->pin_count) {
ret = mgag200_bo_pin(pixels_1, TTM_PL_FLAG_VRAM,
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
index 8f760002e401..913192c94876 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
@@ -159,7 +159,6 @@ struct nvkm_device_func {
struct nvkm_device_quirk {
u8 tv_pin_mask;
u8 tv_gpio;
- bool War00C800_0;
};
struct nvkm_device_chip {
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h
index 28bc202f9753..40f845e31272 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h
@@ -7,6 +7,7 @@ struct nvkm_instmem {
const struct nvkm_instmem_func *func;
struct nvkm_subdev subdev;
+ spinlock_t lock;
struct list_head list;
u32 reserved;
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 8b8332e46f24..d5e6938cc6bc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -367,6 +367,7 @@ static int nouveau_rom_call(acpi_handle rom_handle, uint8_t *bios,
return -ENODEV;
}
obj = (union acpi_object *)buffer.pointer;
+ len = min(len, (int)obj->buffer.length);
memcpy(bios+offset, obj->buffer.pointer, len);
kfree(buffer.pointer);
return len;
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index db6bc6760545..64c8d932d5f1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -829,7 +829,6 @@ nouveau_finish_page_flip(struct nouveau_channel *chan,
struct drm_device *dev = drm->dev;
struct nouveau_page_flip_state *s;
unsigned long flags;
- int crtcid = -1;
spin_lock_irqsave(&dev->event_lock, flags);
@@ -841,15 +840,19 @@ nouveau_finish_page_flip(struct nouveau_channel *chan,
s = list_first_entry(&fctx->flip, struct nouveau_page_flip_state, head);
if (s->event) {
- /* Vblank timestamps/counts are only correct on >= NV-50 */
- if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA)
- crtcid = s->crtc;
+ if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
+ drm_arm_vblank_event(dev, s->crtc, s->event);
+ } else {
+ drm_send_vblank_event(dev, s->crtc, s->event);
- drm_send_vblank_event(dev, crtcid, s->event);
+ /* Give up ownership of vblank for page-flipped crtc */
+ drm_vblank_put(dev, s->crtc);
+ }
+ }
+ else {
+ /* Give up ownership of vblank for page-flipped crtc */
+ drm_vblank_put(dev, s->crtc);
}
-
- /* Give up ownership of vblank for page-flipped crtc */
- drm_vblank_put(dev, s->crtc);
list_del(&s->head);
if (ps)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.h b/drivers/gpu/drm/nouveau/nouveau_drm.h
index 3050042e6c6d..a02813e994ec 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.h
@@ -39,6 +39,7 @@
#include <nvif/client.h>
#include <nvif/device.h>
+#include <nvif/ioctl.h>
#include <drmP.h>
@@ -65,9 +66,10 @@ struct nouveau_drm_tile {
};
enum nouveau_drm_object_route {
- NVDRM_OBJECT_NVIF = 0,
+ NVDRM_OBJECT_NVIF = NVIF_IOCTL_V0_OWNER_NVIF,
NVDRM_OBJECT_USIF,
NVDRM_OBJECT_ABI16,
+ NVDRM_OBJECT_ANY = NVIF_IOCTL_V0_OWNER_ANY,
};
enum nouveau_drm_notify_route {
diff --git a/drivers/gpu/drm/nouveau/nouveau_usif.c b/drivers/gpu/drm/nouveau/nouveau_usif.c
index 89dc4ce63490..6ae1b3494bcd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_usif.c
+++ b/drivers/gpu/drm/nouveau/nouveau_usif.c
@@ -313,7 +313,10 @@ usif_ioctl(struct drm_file *filp, void __user *user, u32 argc)
if (nvif_unpack(argv->v0, 0, 0, true)) {
/* block access to objects not created via this interface */
owner = argv->v0.owner;
- argv->v0.owner = NVDRM_OBJECT_USIF;
+ if (argv->v0.object == 0ULL)
+ argv->v0.owner = NVDRM_OBJECT_ANY; /* except client */
+ else
+ argv->v0.owner = NVDRM_OBJECT_USIF;
} else
goto done;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index e3c783d0e2ab..62ad0300cfa5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -259,12 +259,6 @@ nvkm_device_pci_10de_0df4[] = {
};
static const struct nvkm_device_pci_vendor
-nvkm_device_pci_10de_0fcd[] = {
- { 0x17aa, 0x3801, NULL, { .War00C800_0 = true } }, /* Lenovo Y510P */
- {}
-};
-
-static const struct nvkm_device_pci_vendor
nvkm_device_pci_10de_0fd2[] = {
{ 0x1028, 0x0595, "GeForce GT 640M LE" },
{ 0x1028, 0x05b2, "GeForce GT 640M LE" },
@@ -684,7 +678,6 @@ nvkm_device_pci_10de_1189[] = {
static const struct nvkm_device_pci_vendor
nvkm_device_pci_10de_1199[] = {
{ 0x1458, 0xd001, "GeForce GTX 760" },
- { 0x1462, 0x1106, "GeForce GTX 780M", { .War00C800_0 = true } }, /* Medion Erazer X7827 */
{}
};
@@ -695,14 +688,6 @@ nvkm_device_pci_10de_11e3[] = {
};
static const struct nvkm_device_pci_vendor
-nvkm_device_pci_10de_11fc[] = {
- { 0x1179, 0x0001, NULL, { .War00C800_0 = true } }, /* Toshiba Tecra W50 */
- { 0x17aa, 0x2211, NULL, { .War00C800_0 = true } }, /* Lenovo W541 */
- { 0x17aa, 0x221e, NULL, { .War00C800_0 = true } }, /* Lenovo W541 */
- {}
-};
-
-static const struct nvkm_device_pci_vendor
nvkm_device_pci_10de_1247[] = {
{ 0x1043, 0x212a, "GeForce GT 635M" },
{ 0x1043, 0x212b, "GeForce GT 635M" },
@@ -1356,7 +1341,7 @@ nvkm_device_pci_10de[] = {
{ 0x0fc6, "GeForce GTX 650" },
{ 0x0fc8, "GeForce GT 740" },
{ 0x0fc9, "GeForce GT 730" },
- { 0x0fcd, "GeForce GT 755M", nvkm_device_pci_10de_0fcd },
+ { 0x0fcd, "GeForce GT 755M" },
{ 0x0fce, "GeForce GT 640M LE" },
{ 0x0fd1, "GeForce GT 650M" },
{ 0x0fd2, "GeForce GT 640M", nvkm_device_pci_10de_0fd2 },
@@ -1490,7 +1475,7 @@ nvkm_device_pci_10de[] = {
{ 0x11e2, "GeForce GTX 765M" },
{ 0x11e3, "GeForce GTX 760M", nvkm_device_pci_10de_11e3 },
{ 0x11fa, "Quadro K4000" },
- { 0x11fc, "Quadro K2100M", nvkm_device_pci_10de_11fc },
+ { 0x11fc, "Quadro K2100M" },
{ 0x1200, "GeForce GTX 560 Ti" },
{ 0x1201, "GeForce GTX 560" },
{ 0x1203, "GeForce GTX 460 SE v2" },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
index b5b875928aba..74de7a96c22a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
@@ -207,6 +207,8 @@ gf117_grctx_generate_attrib(struct gf100_grctx *info)
const u32 b = beta * gr->ppc_tpc_nr[gpc][ppc];
const u32 t = timeslice_mode;
const u32 o = PPC_UNIT(gpc, ppc, 0);
+ if (!(gr->ppc_mask[gpc] & (1 << ppc)))
+ continue;
mmio_skip(info, o + 0xc0, (t << 28) | (b << 16) | ++bo);
mmio_wr32(info, o + 0xc0, (t << 28) | (b << 16) | --bo);
bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpc.fuc b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpc.fuc
index 194afe910d21..7dacb3cc0668 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpc.fuc
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpc.fuc
@@ -52,10 +52,12 @@ mmio_list_base:
#endif
#ifdef INCLUDE_CODE
+#define gpc_addr(reg,addr) /*
+*/ imm32(reg,addr) /*
+*/ or reg NV_PGRAPH_GPCX_GPCCS_MMIO_CTRL_BASE_ENABLE
#define gpc_wr32(addr,reg) /*
+*/ gpc_addr($r14,addr) /*
*/ mov b32 $r15 reg /*
-*/ imm32($r14, addr) /*
-*/ or $r14 NV_PGRAPH_GPCX_GPCCS_MMIO_CTRL_BASE_ENABLE /*
*/ call(nv_wr32)
// reports an exception to the host
@@ -161,7 +163,7 @@ init:
#if NV_PGRAPH_GPCX_UNK__SIZE > 0
// figure out which, and how many, UNKs are actually present
- imm32($r14, 0x500c30)
+ gpc_addr($r14, 0x500c30)
clear b32 $r2
clear b32 $r3
clear b32 $r4
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h
index 64d07df4b8b1..bb820ff28621 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h
@@ -314,7 +314,7 @@ uint32_t gf117_grgpc_code[] = {
0x03f01200,
0x0002d000,
0x17f104bd,
- 0x10fe0542,
+ 0x10fe0545,
0x0007f100,
0x0003f007,
0xbd0000d0,
@@ -338,184 +338,184 @@ uint32_t gf117_grgpc_code[] = {
0x02d00103,
0xf104bd00,
0xf00c30e7,
- 0x24bd50e3,
- 0x44bd34bd,
-/* 0x0430: init_unk_loop */
- 0xb06821f4,
- 0x0bf400f6,
- 0x01f7f00f,
- 0xfd04f2bb,
- 0x30b6054f,
-/* 0x0445: init_unk_next */
- 0x0120b601,
- 0xb004e0b6,
- 0x1bf40126,
-/* 0x0451: init_unk_done */
- 0x070380e2,
- 0xf1080480,
- 0xf0010027,
- 0x22cf0223,
- 0x9534bd00,
- 0x07f10825,
- 0x03f0c000,
- 0x0005d001,
- 0x07f104bd,
- 0x03f0c100,
- 0x0005d001,
- 0x0e9804bd,
- 0x010f9800,
- 0x015021f5,
- 0xbb002fbb,
- 0x0e98003f,
- 0x020f9801,
- 0x015021f5,
- 0xfd050e98,
- 0x2ebb00ef,
- 0x003ebb00,
- 0x98020e98,
- 0x21f5030f,
- 0x0e980150,
- 0x00effd07,
- 0xbb002ebb,
- 0x35b6003e,
- 0x0007f102,
- 0x0103f0d3,
- 0xbd0003d0,
- 0x0825b604,
- 0xb60635b6,
- 0x30b60120,
- 0x0824b601,
- 0xb90834b6,
- 0x21f5022f,
- 0x2fbb02d3,
- 0x003fbb00,
- 0x010007f1,
- 0xd00203f0,
+ 0xe5f050e3,
+ 0xbd24bd01,
+/* 0x0433: init_unk_loop */
+ 0xf444bd34,
+ 0xf6b06821,
+ 0x0f0bf400,
+ 0xbb01f7f0,
+ 0x4ffd04f2,
+ 0x0130b605,
+/* 0x0448: init_unk_next */
+ 0xb60120b6,
+ 0x26b004e0,
+ 0xe21bf401,
+/* 0x0454: init_unk_done */
+ 0x80070380,
+ 0x27f10804,
+ 0x23f00100,
+ 0x0022cf02,
+ 0x259534bd,
+ 0x0007f108,
+ 0x0103f0c0,
+ 0xbd0005d0,
+ 0x0007f104,
+ 0x0103f0c1,
+ 0xbd0005d0,
+ 0x000e9804,
+ 0xf5010f98,
+ 0xbb015021,
+ 0x3fbb002f,
+ 0x010e9800,
+ 0xf5020f98,
+ 0x98015021,
+ 0xeffd050e,
+ 0x002ebb00,
+ 0x98003ebb,
+ 0x0f98020e,
+ 0x5021f503,
+ 0x070e9801,
+ 0xbb00effd,
+ 0x3ebb002e,
+ 0x0235b600,
+ 0xd30007f1,
+ 0xd00103f0,
0x04bd0003,
- 0x29f024bd,
- 0x0007f11f,
- 0x0203f008,
- 0xbd0002d0,
-/* 0x0505: main */
- 0x0031f404,
- 0xf00028f4,
- 0x21f424d7,
- 0xf401f439,
- 0xf404e4b0,
- 0x81fe1e18,
- 0x0627f001,
- 0x12fd20bd,
- 0x01e4b604,
- 0xfe051efd,
- 0x21f50018,
- 0x0ef405fa,
-/* 0x0535: main_not_ctx_xfer */
- 0x10ef94d3,
- 0xf501f5f0,
- 0xf4037e21,
-/* 0x0542: ih */
- 0x80f9c60e,
- 0xf90188fe,
- 0xf990f980,
- 0xf9b0f9a0,
- 0xf9e0f9d0,
- 0xf104bdf0,
- 0xf00200a7,
- 0xaacf00a3,
- 0x04abc400,
- 0xf02c0bf4,
- 0xe7f124d7,
- 0xe3f01a00,
- 0x00eecf00,
- 0x1900f7f1,
- 0xcf00f3f0,
- 0x21f400ff,
- 0x01e7f004,
- 0x1d0007f1,
- 0xd00003f0,
- 0x04bd000e,
-/* 0x0590: ih_no_fifo */
- 0x010007f1,
- 0xd00003f0,
- 0x04bd000a,
- 0xe0fcf0fc,
- 0xb0fcd0fc,
- 0x90fca0fc,
- 0x88fe80fc,
- 0xf480fc00,
- 0x01f80032,
-/* 0x05b4: hub_barrier_done */
- 0x9801f7f0,
- 0xfebb040e,
- 0x02ffb904,
- 0x9418e7f1,
- 0xf440e3f0,
- 0x00f89d21,
-/* 0x05cc: ctx_redswitch */
- 0xf120f7f0,
+ 0xb60825b6,
+ 0x20b60635,
+ 0x0130b601,
+ 0xb60824b6,
+ 0x2fb90834,
+ 0xd321f502,
+ 0x002fbb02,
+ 0xf1003fbb,
+ 0xf0010007,
+ 0x03d00203,
+ 0xbd04bd00,
+ 0x1f29f024,
+ 0x080007f1,
+ 0xd00203f0,
+ 0x04bd0002,
+/* 0x0508: main */
+ 0xf40031f4,
+ 0xd7f00028,
+ 0x3921f424,
+ 0xb0f401f4,
+ 0x18f404e4,
+ 0x0181fe1e,
+ 0xbd0627f0,
+ 0x0412fd20,
+ 0xfd01e4b6,
+ 0x18fe051e,
+ 0xfd21f500,
+ 0xd30ef405,
+/* 0x0538: main_not_ctx_xfer */
+ 0xf010ef94,
+ 0x21f501f5,
+ 0x0ef4037e,
+/* 0x0545: ih */
+ 0xfe80f9c6,
+ 0x80f90188,
+ 0xa0f990f9,
+ 0xd0f9b0f9,
+ 0xf0f9e0f9,
+ 0xa7f104bd,
+ 0xa3f00200,
+ 0x00aacf00,
+ 0xf404abc4,
+ 0xd7f02c0b,
+ 0x00e7f124,
+ 0x00e3f01a,
+ 0xf100eecf,
+ 0xf01900f7,
+ 0xffcf00f3,
+ 0x0421f400,
+ 0xf101e7f0,
+ 0xf01d0007,
+ 0x0ed00003,
+/* 0x0593: ih_no_fifo */
+ 0xf104bd00,
+ 0xf0010007,
+ 0x0ad00003,
+ 0xfc04bd00,
+ 0xfce0fcf0,
+ 0xfcb0fcd0,
+ 0xfc90fca0,
+ 0x0088fe80,
+ 0x32f480fc,
+/* 0x05b7: hub_barrier_done */
+ 0xf001f800,
+ 0x0e9801f7,
+ 0x04febb04,
+ 0xf102ffb9,
+ 0xf09418e7,
+ 0x21f440e3,
+/* 0x05cf: ctx_redswitch */
+ 0xf000f89d,
+ 0x07f120f7,
+ 0x03f08500,
+ 0x000fd001,
+ 0xe7f004bd,
+/* 0x05e1: ctx_redswitch_delay */
+ 0x01e2b608,
+ 0xf1fd1bf4,
+ 0xf10800f5,
+ 0xf10200f5,
0xf0850007,
0x0fd00103,
- 0xf004bd00,
-/* 0x05de: ctx_redswitch_delay */
- 0xe2b608e7,
- 0xfd1bf401,
- 0x0800f5f1,
- 0x0200f5f1,
- 0x850007f1,
- 0xd00103f0,
- 0x04bd000f,
-/* 0x05fa: ctx_xfer */
- 0x07f100f8,
- 0x03f08100,
- 0x000fd002,
- 0x11f404bd,
- 0xcc21f507,
-/* 0x060d: ctx_xfer_not_load */
- 0x6a21f505,
- 0xf124bd02,
- 0xf047fc07,
- 0x02d00203,
- 0xf004bd00,
- 0x20b6012c,
- 0xfc07f103,
- 0x0203f04a,
- 0xbd0002d0,
- 0x01acf004,
- 0xf102a5f0,
- 0xf00000b7,
- 0x0c9850b3,
- 0x0fc4b604,
- 0x9800bcbb,
- 0x0d98000c,
- 0x00e7f001,
- 0x016f21f5,
- 0xf101acf0,
- 0xf04000b7,
- 0x0c9850b3,
- 0x0fc4b604,
- 0x9800bcbb,
- 0x0d98010c,
- 0x060f9802,
- 0x0800e7f1,
- 0x016f21f5,
+ 0xf804bd00,
+/* 0x05fd: ctx_xfer */
+ 0x0007f100,
+ 0x0203f081,
+ 0xbd000fd0,
+ 0x0711f404,
+ 0x05cf21f5,
+/* 0x0610: ctx_xfer_not_load */
+ 0x026a21f5,
+ 0x07f124bd,
+ 0x03f047fc,
+ 0x0002d002,
+ 0x2cf004bd,
+ 0x0320b601,
+ 0x4afc07f1,
+ 0xd00203f0,
+ 0x04bd0002,
0xf001acf0,
- 0xb7f104a5,
- 0xb3f03000,
+ 0xb7f102a5,
+ 0xb3f00000,
0x040c9850,
0xbb0fc4b6,
0x0c9800bc,
- 0x030d9802,
- 0xf1080f98,
- 0xf50200e7,
- 0xf5016f21,
- 0xf4025e21,
- 0x12f40601,
-/* 0x06a9: ctx_xfer_post */
- 0x7f21f507,
-/* 0x06ad: ctx_xfer_done */
- 0xb421f502,
- 0x0000f805,
- 0x00000000,
+ 0x010d9800,
+ 0xf500e7f0,
+ 0xf0016f21,
+ 0xb7f101ac,
+ 0xb3f04000,
+ 0x040c9850,
+ 0xbb0fc4b6,
+ 0x0c9800bc,
+ 0x020d9801,
+ 0xf1060f98,
+ 0xf50800e7,
+ 0xf0016f21,
+ 0xa5f001ac,
+ 0x00b7f104,
+ 0x50b3f030,
+ 0xb6040c98,
+ 0xbcbb0fc4,
+ 0x020c9800,
+ 0x98030d98,
+ 0xe7f1080f,
+ 0x21f50200,
+ 0x21f5016f,
+ 0x01f4025e,
+ 0x0712f406,
+/* 0x06ac: ctx_xfer_post */
+ 0x027f21f5,
+/* 0x06b0: ctx_xfer_done */
+ 0x05b721f5,
+ 0x000000f8,
0x00000000,
0x00000000,
0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h
index 2f596433c222..911976d20940 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h
@@ -314,7 +314,7 @@ uint32_t gk104_grgpc_code[] = {
0x03f01200,
0x0002d000,
0x17f104bd,
- 0x10fe0542,
+ 0x10fe0545,
0x0007f100,
0x0003f007,
0xbd0000d0,
@@ -338,184 +338,184 @@ uint32_t gk104_grgpc_code[] = {
0x02d00103,
0xf104bd00,
0xf00c30e7,
- 0x24bd50e3,
- 0x44bd34bd,
-/* 0x0430: init_unk_loop */
- 0xb06821f4,
- 0x0bf400f6,
- 0x01f7f00f,
- 0xfd04f2bb,
- 0x30b6054f,
-/* 0x0445: init_unk_next */
- 0x0120b601,
- 0xb004e0b6,
- 0x1bf40126,
-/* 0x0451: init_unk_done */
- 0x070380e2,
- 0xf1080480,
- 0xf0010027,
- 0x22cf0223,
- 0x9534bd00,
- 0x07f10825,
- 0x03f0c000,
- 0x0005d001,
- 0x07f104bd,
- 0x03f0c100,
- 0x0005d001,
- 0x0e9804bd,
- 0x010f9800,
- 0x015021f5,
- 0xbb002fbb,
- 0x0e98003f,
- 0x020f9801,
- 0x015021f5,
- 0xfd050e98,
- 0x2ebb00ef,
- 0x003ebb00,
- 0x98020e98,
- 0x21f5030f,
- 0x0e980150,
- 0x00effd07,
- 0xbb002ebb,
- 0x35b6003e,
- 0x0007f102,
- 0x0103f0d3,
- 0xbd0003d0,
- 0x0825b604,
- 0xb60635b6,
- 0x30b60120,
- 0x0824b601,
- 0xb90834b6,
- 0x21f5022f,
- 0x2fbb02d3,
- 0x003fbb00,
- 0x010007f1,
- 0xd00203f0,
+ 0xe5f050e3,
+ 0xbd24bd01,
+/* 0x0433: init_unk_loop */
+ 0xf444bd34,
+ 0xf6b06821,
+ 0x0f0bf400,
+ 0xbb01f7f0,
+ 0x4ffd04f2,
+ 0x0130b605,
+/* 0x0448: init_unk_next */
+ 0xb60120b6,
+ 0x26b004e0,
+ 0xe21bf401,
+/* 0x0454: init_unk_done */
+ 0x80070380,
+ 0x27f10804,
+ 0x23f00100,
+ 0x0022cf02,
+ 0x259534bd,
+ 0x0007f108,
+ 0x0103f0c0,
+ 0xbd0005d0,
+ 0x0007f104,
+ 0x0103f0c1,
+ 0xbd0005d0,
+ 0x000e9804,
+ 0xf5010f98,
+ 0xbb015021,
+ 0x3fbb002f,
+ 0x010e9800,
+ 0xf5020f98,
+ 0x98015021,
+ 0xeffd050e,
+ 0x002ebb00,
+ 0x98003ebb,
+ 0x0f98020e,
+ 0x5021f503,
+ 0x070e9801,
+ 0xbb00effd,
+ 0x3ebb002e,
+ 0x0235b600,
+ 0xd30007f1,
+ 0xd00103f0,
0x04bd0003,
- 0x29f024bd,
- 0x0007f11f,
- 0x0203f008,
- 0xbd0002d0,
-/* 0x0505: main */
- 0x0031f404,
- 0xf00028f4,
- 0x21f424d7,
- 0xf401f439,
- 0xf404e4b0,
- 0x81fe1e18,
- 0x0627f001,
- 0x12fd20bd,
- 0x01e4b604,
- 0xfe051efd,
- 0x21f50018,
- 0x0ef405fa,
-/* 0x0535: main_not_ctx_xfer */
- 0x10ef94d3,
- 0xf501f5f0,
- 0xf4037e21,
-/* 0x0542: ih */
- 0x80f9c60e,
- 0xf90188fe,
- 0xf990f980,
- 0xf9b0f9a0,
- 0xf9e0f9d0,
- 0xf104bdf0,
- 0xf00200a7,
- 0xaacf00a3,
- 0x04abc400,
- 0xf02c0bf4,
- 0xe7f124d7,
- 0xe3f01a00,
- 0x00eecf00,
- 0x1900f7f1,
- 0xcf00f3f0,
- 0x21f400ff,
- 0x01e7f004,
- 0x1d0007f1,
- 0xd00003f0,
- 0x04bd000e,
-/* 0x0590: ih_no_fifo */
- 0x010007f1,
- 0xd00003f0,
- 0x04bd000a,
- 0xe0fcf0fc,
- 0xb0fcd0fc,
- 0x90fca0fc,
- 0x88fe80fc,
- 0xf480fc00,
- 0x01f80032,
-/* 0x05b4: hub_barrier_done */
- 0x9801f7f0,
- 0xfebb040e,
- 0x02ffb904,
- 0x9418e7f1,
- 0xf440e3f0,
- 0x00f89d21,
-/* 0x05cc: ctx_redswitch */
- 0xf120f7f0,
+ 0xb60825b6,
+ 0x20b60635,
+ 0x0130b601,
+ 0xb60824b6,
+ 0x2fb90834,
+ 0xd321f502,
+ 0x002fbb02,
+ 0xf1003fbb,
+ 0xf0010007,
+ 0x03d00203,
+ 0xbd04bd00,
+ 0x1f29f024,
+ 0x080007f1,
+ 0xd00203f0,
+ 0x04bd0002,
+/* 0x0508: main */
+ 0xf40031f4,
+ 0xd7f00028,
+ 0x3921f424,
+ 0xb0f401f4,
+ 0x18f404e4,
+ 0x0181fe1e,
+ 0xbd0627f0,
+ 0x0412fd20,
+ 0xfd01e4b6,
+ 0x18fe051e,
+ 0xfd21f500,
+ 0xd30ef405,
+/* 0x0538: main_not_ctx_xfer */
+ 0xf010ef94,
+ 0x21f501f5,
+ 0x0ef4037e,
+/* 0x0545: ih */
+ 0xfe80f9c6,
+ 0x80f90188,
+ 0xa0f990f9,
+ 0xd0f9b0f9,
+ 0xf0f9e0f9,
+ 0xa7f104bd,
+ 0xa3f00200,
+ 0x00aacf00,
+ 0xf404abc4,
+ 0xd7f02c0b,
+ 0x00e7f124,
+ 0x00e3f01a,
+ 0xf100eecf,
+ 0xf01900f7,
+ 0xffcf00f3,
+ 0x0421f400,
+ 0xf101e7f0,
+ 0xf01d0007,
+ 0x0ed00003,
+/* 0x0593: ih_no_fifo */
+ 0xf104bd00,
+ 0xf0010007,
+ 0x0ad00003,
+ 0xfc04bd00,
+ 0xfce0fcf0,
+ 0xfcb0fcd0,
+ 0xfc90fca0,
+ 0x0088fe80,
+ 0x32f480fc,
+/* 0x05b7: hub_barrier_done */
+ 0xf001f800,
+ 0x0e9801f7,
+ 0x04febb04,
+ 0xf102ffb9,
+ 0xf09418e7,
+ 0x21f440e3,
+/* 0x05cf: ctx_redswitch */
+ 0xf000f89d,
+ 0x07f120f7,
+ 0x03f08500,
+ 0x000fd001,
+ 0xe7f004bd,
+/* 0x05e1: ctx_redswitch_delay */
+ 0x01e2b608,
+ 0xf1fd1bf4,
+ 0xf10800f5,
+ 0xf10200f5,
0xf0850007,
0x0fd00103,
- 0xf004bd00,
-/* 0x05de: ctx_redswitch_delay */
- 0xe2b608e7,
- 0xfd1bf401,
- 0x0800f5f1,
- 0x0200f5f1,
- 0x850007f1,
- 0xd00103f0,
- 0x04bd000f,
-/* 0x05fa: ctx_xfer */
- 0x07f100f8,
- 0x03f08100,
- 0x000fd002,
- 0x11f404bd,
- 0xcc21f507,
-/* 0x060d: ctx_xfer_not_load */
- 0x6a21f505,
- 0xf124bd02,
- 0xf047fc07,
- 0x02d00203,
- 0xf004bd00,
- 0x20b6012c,
- 0xfc07f103,
- 0x0203f04a,
- 0xbd0002d0,
- 0x01acf004,
- 0xf102a5f0,
- 0xf00000b7,
- 0x0c9850b3,
- 0x0fc4b604,
- 0x9800bcbb,
- 0x0d98000c,
- 0x00e7f001,
- 0x016f21f5,
- 0xf101acf0,
- 0xf04000b7,
- 0x0c9850b3,
- 0x0fc4b604,
- 0x9800bcbb,
- 0x0d98010c,
- 0x060f9802,
- 0x0800e7f1,
- 0x016f21f5,
+ 0xf804bd00,
+/* 0x05fd: ctx_xfer */
+ 0x0007f100,
+ 0x0203f081,
+ 0xbd000fd0,
+ 0x0711f404,
+ 0x05cf21f5,
+/* 0x0610: ctx_xfer_not_load */
+ 0x026a21f5,
+ 0x07f124bd,
+ 0x03f047fc,
+ 0x0002d002,
+ 0x2cf004bd,
+ 0x0320b601,
+ 0x4afc07f1,
+ 0xd00203f0,
+ 0x04bd0002,
0xf001acf0,
- 0xb7f104a5,
- 0xb3f03000,
+ 0xb7f102a5,
+ 0xb3f00000,
0x040c9850,
0xbb0fc4b6,
0x0c9800bc,
- 0x030d9802,
- 0xf1080f98,
- 0xf50200e7,
- 0xf5016f21,
- 0xf4025e21,
- 0x12f40601,
-/* 0x06a9: ctx_xfer_post */
- 0x7f21f507,
-/* 0x06ad: ctx_xfer_done */
- 0xb421f502,
- 0x0000f805,
- 0x00000000,
+ 0x010d9800,
+ 0xf500e7f0,
+ 0xf0016f21,
+ 0xb7f101ac,
+ 0xb3f04000,
+ 0x040c9850,
+ 0xbb0fc4b6,
+ 0x0c9800bc,
+ 0x020d9801,
+ 0xf1060f98,
+ 0xf50800e7,
+ 0xf0016f21,
+ 0xa5f001ac,
+ 0x00b7f104,
+ 0x50b3f030,
+ 0xb6040c98,
+ 0xbcbb0fc4,
+ 0x020c9800,
+ 0x98030d98,
+ 0xe7f1080f,
+ 0x21f50200,
+ 0x21f5016f,
+ 0x01f4025e,
+ 0x0712f406,
+/* 0x06ac: ctx_xfer_post */
+ 0x027f21f5,
+/* 0x06b0: ctx_xfer_done */
+ 0x05b721f5,
+ 0x000000f8,
0x00000000,
0x00000000,
0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h
index ee8e54db8fc9..1c6e11b05df2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h
@@ -314,7 +314,7 @@ uint32_t gk110_grgpc_code[] = {
0x03f01200,
0x0002d000,
0x17f104bd,
- 0x10fe0542,
+ 0x10fe0545,
0x0007f100,
0x0003f007,
0xbd0000d0,
@@ -338,184 +338,184 @@ uint32_t gk110_grgpc_code[] = {
0x02d00103,
0xf104bd00,
0xf00c30e7,
- 0x24bd50e3,
- 0x44bd34bd,
-/* 0x0430: init_unk_loop */
- 0xb06821f4,
- 0x0bf400f6,
- 0x01f7f00f,
- 0xfd04f2bb,
- 0x30b6054f,
-/* 0x0445: init_unk_next */
- 0x0120b601,
- 0xb004e0b6,
- 0x1bf40226,
-/* 0x0451: init_unk_done */
- 0x070380e2,
- 0xf1080480,
- 0xf0010027,
- 0x22cf0223,
- 0x9534bd00,
- 0x07f10825,
- 0x03f0c000,
- 0x0005d001,
- 0x07f104bd,
- 0x03f0c100,
- 0x0005d001,
- 0x0e9804bd,
- 0x010f9800,
- 0x015021f5,
- 0xbb002fbb,
- 0x0e98003f,
- 0x020f9801,
- 0x015021f5,
- 0xfd050e98,
- 0x2ebb00ef,
- 0x003ebb00,
- 0x98020e98,
- 0x21f5030f,
- 0x0e980150,
- 0x00effd07,
- 0xbb002ebb,
- 0x35b6003e,
- 0x0007f102,
- 0x0103f0d3,
- 0xbd0003d0,
- 0x0825b604,
- 0xb60635b6,
- 0x30b60120,
- 0x0824b601,
- 0xb90834b6,
- 0x21f5022f,
- 0x2fbb02d3,
- 0x003fbb00,
- 0x010007f1,
- 0xd00203f0,
+ 0xe5f050e3,
+ 0xbd24bd01,
+/* 0x0433: init_unk_loop */
+ 0xf444bd34,
+ 0xf6b06821,
+ 0x0f0bf400,
+ 0xbb01f7f0,
+ 0x4ffd04f2,
+ 0x0130b605,
+/* 0x0448: init_unk_next */
+ 0xb60120b6,
+ 0x26b004e0,
+ 0xe21bf402,
+/* 0x0454: init_unk_done */
+ 0x80070380,
+ 0x27f10804,
+ 0x23f00100,
+ 0x0022cf02,
+ 0x259534bd,
+ 0x0007f108,
+ 0x0103f0c0,
+ 0xbd0005d0,
+ 0x0007f104,
+ 0x0103f0c1,
+ 0xbd0005d0,
+ 0x000e9804,
+ 0xf5010f98,
+ 0xbb015021,
+ 0x3fbb002f,
+ 0x010e9800,
+ 0xf5020f98,
+ 0x98015021,
+ 0xeffd050e,
+ 0x002ebb00,
+ 0x98003ebb,
+ 0x0f98020e,
+ 0x5021f503,
+ 0x070e9801,
+ 0xbb00effd,
+ 0x3ebb002e,
+ 0x0235b600,
+ 0xd30007f1,
+ 0xd00103f0,
0x04bd0003,
- 0x29f024bd,
- 0x0007f11f,
- 0x0203f030,
- 0xbd0002d0,
-/* 0x0505: main */
- 0x0031f404,
- 0xf00028f4,
- 0x21f424d7,
- 0xf401f439,
- 0xf404e4b0,
- 0x81fe1e18,
- 0x0627f001,
- 0x12fd20bd,
- 0x01e4b604,
- 0xfe051efd,
- 0x21f50018,
- 0x0ef405fa,
-/* 0x0535: main_not_ctx_xfer */
- 0x10ef94d3,
- 0xf501f5f0,
- 0xf4037e21,
-/* 0x0542: ih */
- 0x80f9c60e,
- 0xf90188fe,
- 0xf990f980,
- 0xf9b0f9a0,
- 0xf9e0f9d0,
- 0xf104bdf0,
- 0xf00200a7,
- 0xaacf00a3,
- 0x04abc400,
- 0xf02c0bf4,
- 0xe7f124d7,
- 0xe3f01a00,
- 0x00eecf00,
- 0x1900f7f1,
- 0xcf00f3f0,
- 0x21f400ff,
- 0x01e7f004,
- 0x1d0007f1,
- 0xd00003f0,
- 0x04bd000e,
-/* 0x0590: ih_no_fifo */
- 0x010007f1,
- 0xd00003f0,
- 0x04bd000a,
- 0xe0fcf0fc,
- 0xb0fcd0fc,
- 0x90fca0fc,
- 0x88fe80fc,
- 0xf480fc00,
- 0x01f80032,
-/* 0x05b4: hub_barrier_done */
- 0x9801f7f0,
- 0xfebb040e,
- 0x02ffb904,
- 0x9418e7f1,
- 0xf440e3f0,
- 0x00f89d21,
-/* 0x05cc: ctx_redswitch */
- 0xf120f7f0,
+ 0xb60825b6,
+ 0x20b60635,
+ 0x0130b601,
+ 0xb60824b6,
+ 0x2fb90834,
+ 0xd321f502,
+ 0x002fbb02,
+ 0xf1003fbb,
+ 0xf0010007,
+ 0x03d00203,
+ 0xbd04bd00,
+ 0x1f29f024,
+ 0x300007f1,
+ 0xd00203f0,
+ 0x04bd0002,
+/* 0x0508: main */
+ 0xf40031f4,
+ 0xd7f00028,
+ 0x3921f424,
+ 0xb0f401f4,
+ 0x18f404e4,
+ 0x0181fe1e,
+ 0xbd0627f0,
+ 0x0412fd20,
+ 0xfd01e4b6,
+ 0x18fe051e,
+ 0xfd21f500,
+ 0xd30ef405,
+/* 0x0538: main_not_ctx_xfer */
+ 0xf010ef94,
+ 0x21f501f5,
+ 0x0ef4037e,
+/* 0x0545: ih */
+ 0xfe80f9c6,
+ 0x80f90188,
+ 0xa0f990f9,
+ 0xd0f9b0f9,
+ 0xf0f9e0f9,
+ 0xa7f104bd,
+ 0xa3f00200,
+ 0x00aacf00,
+ 0xf404abc4,
+ 0xd7f02c0b,
+ 0x00e7f124,
+ 0x00e3f01a,
+ 0xf100eecf,
+ 0xf01900f7,
+ 0xffcf00f3,
+ 0x0421f400,
+ 0xf101e7f0,
+ 0xf01d0007,
+ 0x0ed00003,
+/* 0x0593: ih_no_fifo */
+ 0xf104bd00,
+ 0xf0010007,
+ 0x0ad00003,
+ 0xfc04bd00,
+ 0xfce0fcf0,
+ 0xfcb0fcd0,
+ 0xfc90fca0,
+ 0x0088fe80,
+ 0x32f480fc,
+/* 0x05b7: hub_barrier_done */
+ 0xf001f800,
+ 0x0e9801f7,
+ 0x04febb04,
+ 0xf102ffb9,
+ 0xf09418e7,
+ 0x21f440e3,
+/* 0x05cf: ctx_redswitch */
+ 0xf000f89d,
+ 0x07f120f7,
+ 0x03f08500,
+ 0x000fd001,
+ 0xe7f004bd,
+/* 0x05e1: ctx_redswitch_delay */
+ 0x01e2b608,
+ 0xf1fd1bf4,
+ 0xf10800f5,
+ 0xf10200f5,
0xf0850007,
0x0fd00103,
- 0xf004bd00,
-/* 0x05de: ctx_redswitch_delay */
- 0xe2b608e7,
- 0xfd1bf401,
- 0x0800f5f1,
- 0x0200f5f1,
- 0x850007f1,
- 0xd00103f0,
- 0x04bd000f,
-/* 0x05fa: ctx_xfer */
- 0x07f100f8,
- 0x03f08100,
- 0x000fd002,
- 0x11f404bd,
- 0xcc21f507,
-/* 0x060d: ctx_xfer_not_load */
- 0x6a21f505,
- 0xf124bd02,
- 0xf047fc07,
- 0x02d00203,
- 0xf004bd00,
- 0x20b6012c,
- 0xfc07f103,
- 0x0203f04a,
- 0xbd0002d0,
- 0x01acf004,
- 0xf102a5f0,
- 0xf00000b7,
- 0x0c9850b3,
- 0x0fc4b604,
- 0x9800bcbb,
- 0x0d98000c,
- 0x00e7f001,
- 0x016f21f5,
- 0xf101acf0,
- 0xf04000b7,
- 0x0c9850b3,
- 0x0fc4b604,
- 0x9800bcbb,
- 0x0d98010c,
- 0x060f9802,
- 0x0800e7f1,
- 0x016f21f5,
+ 0xf804bd00,
+/* 0x05fd: ctx_xfer */
+ 0x0007f100,
+ 0x0203f081,
+ 0xbd000fd0,
+ 0x0711f404,
+ 0x05cf21f5,
+/* 0x0610: ctx_xfer_not_load */
+ 0x026a21f5,
+ 0x07f124bd,
+ 0x03f047fc,
+ 0x0002d002,
+ 0x2cf004bd,
+ 0x0320b601,
+ 0x4afc07f1,
+ 0xd00203f0,
+ 0x04bd0002,
0xf001acf0,
- 0xb7f104a5,
- 0xb3f03000,
+ 0xb7f102a5,
+ 0xb3f00000,
0x040c9850,
0xbb0fc4b6,
0x0c9800bc,
- 0x030d9802,
- 0xf1080f98,
- 0xf50200e7,
- 0xf5016f21,
- 0xf4025e21,
- 0x12f40601,
-/* 0x06a9: ctx_xfer_post */
- 0x7f21f507,
-/* 0x06ad: ctx_xfer_done */
- 0xb421f502,
- 0x0000f805,
- 0x00000000,
+ 0x010d9800,
+ 0xf500e7f0,
+ 0xf0016f21,
+ 0xb7f101ac,
+ 0xb3f04000,
+ 0x040c9850,
+ 0xbb0fc4b6,
+ 0x0c9800bc,
+ 0x020d9801,
+ 0xf1060f98,
+ 0xf50800e7,
+ 0xf0016f21,
+ 0xa5f001ac,
+ 0x00b7f104,
+ 0x50b3f030,
+ 0xb6040c98,
+ 0xbcbb0fc4,
+ 0x020c9800,
+ 0x98030d98,
+ 0xe7f1080f,
+ 0x21f50200,
+ 0x21f5016f,
+ 0x01f4025e,
+ 0x0712f406,
+/* 0x06ac: ctx_xfer_post */
+ 0x027f21f5,
+/* 0x06b0: ctx_xfer_done */
+ 0x05b721f5,
+ 0x000000f8,
0x00000000,
0x00000000,
0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h
index fbcc342f896f..84af7ec6a78e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h
@@ -276,7 +276,7 @@ uint32_t gk208_grgpc_code[] = {
0x02020014,
0xf6120040,
0x04bd0002,
- 0xfe048141,
+ 0xfe048441,
0x00400010,
0x0000f607,
0x040204bd,
@@ -295,165 +295,165 @@ uint32_t gk208_grgpc_code[] = {
0x01c90080,
0xbd0002f6,
0x0c308e04,
- 0xbd24bd50,
-/* 0x0383: init_unk_loop */
- 0x7e44bd34,
- 0xb0000065,
- 0x0bf400f6,
- 0xbb010f0e,
- 0x4ffd04f2,
- 0x0130b605,
-/* 0x0398: init_unk_next */
- 0xb60120b6,
- 0x26b004e0,
- 0xe21bf401,
-/* 0x03a4: init_unk_done */
- 0xb50703b5,
- 0x00820804,
- 0x22cf0201,
- 0x9534bd00,
- 0x00800825,
- 0x05f601c0,
- 0x8004bd00,
- 0xf601c100,
- 0x04bd0005,
- 0x98000e98,
- 0x207e010f,
- 0x2fbb0001,
- 0x003fbb00,
- 0x98010e98,
- 0x207e020f,
- 0x0e980001,
- 0x00effd05,
- 0xbb002ebb,
- 0x0e98003e,
- 0x030f9802,
- 0x0001207e,
- 0xfd070e98,
- 0x2ebb00ef,
- 0x003ebb00,
- 0x800235b6,
- 0xf601d300,
- 0x04bd0003,
- 0xb60825b6,
- 0x20b60635,
- 0x0130b601,
- 0xb60824b6,
- 0x2fb20834,
- 0x0002687e,
- 0xbb002fbb,
- 0x0080003f,
- 0x03f60201,
- 0xbd04bd00,
- 0x1f29f024,
- 0x02300080,
- 0xbd0002f6,
-/* 0x0445: main */
- 0x0031f404,
- 0x0d0028f4,
- 0x00377e24,
- 0xf401f400,
- 0xf404e4b0,
- 0x81fe1d18,
- 0xbd060201,
- 0x0412fd20,
- 0xfd01e4b6,
- 0x18fe051e,
- 0x05187e00,
- 0xd40ef400,
-/* 0x0474: main_not_ctx_xfer */
- 0xf010ef94,
- 0xf87e01f5,
- 0x0ef40002,
-/* 0x0481: ih */
- 0xfe80f9c7,
- 0x80f90188,
- 0xa0f990f9,
- 0xd0f9b0f9,
- 0xf0f9e0f9,
- 0x004a04bd,
- 0x00aacf02,
- 0xf404abc4,
- 0x240d1f0b,
- 0xcf1a004e,
- 0x004f00ee,
- 0x00ffcf19,
- 0x0000047e,
- 0x0040010e,
- 0x000ef61d,
-/* 0x04be: ih_no_fifo */
- 0x004004bd,
- 0x000af601,
- 0xf0fc04bd,
- 0xd0fce0fc,
- 0xa0fcb0fc,
- 0x80fc90fc,
- 0xfc0088fe,
- 0x0032f480,
-/* 0x04de: hub_barrier_done */
- 0x010f01f8,
- 0xbb040e98,
- 0xffb204fe,
- 0x4094188e,
- 0x00008f7e,
-/* 0x04f2: ctx_redswitch */
- 0x200f00f8,
+ 0x01e5f050,
+ 0x34bd24bd,
+/* 0x0386: init_unk_loop */
+ 0x657e44bd,
+ 0xf6b00000,
+ 0x0e0bf400,
+ 0xf2bb010f,
+ 0x054ffd04,
+/* 0x039b: init_unk_next */
+ 0xb60130b6,
+ 0xe0b60120,
+ 0x0126b004,
+/* 0x03a7: init_unk_done */
+ 0xb5e21bf4,
+ 0x04b50703,
+ 0x01008208,
+ 0x0022cf02,
+ 0x259534bd,
+ 0xc0008008,
+ 0x0005f601,
+ 0x008004bd,
+ 0x05f601c1,
+ 0x9804bd00,
+ 0x0f98000e,
+ 0x01207e01,
+ 0x002fbb00,
+ 0x98003fbb,
+ 0x0f98010e,
+ 0x01207e02,
+ 0x050e9800,
+ 0xbb00effd,
+ 0x3ebb002e,
+ 0x020e9800,
+ 0x7e030f98,
+ 0x98000120,
+ 0xeffd070e,
+ 0x002ebb00,
+ 0xb6003ebb,
+ 0x00800235,
+ 0x03f601d3,
+ 0xb604bd00,
+ 0x35b60825,
+ 0x0120b606,
+ 0xb60130b6,
+ 0x34b60824,
+ 0x7e2fb208,
+ 0xbb000268,
+ 0x3fbb002f,
+ 0x01008000,
+ 0x0003f602,
+ 0x24bd04bd,
+ 0x801f29f0,
+ 0xf6023000,
+ 0x04bd0002,
+/* 0x0448: main */
+ 0xf40031f4,
+ 0x240d0028,
+ 0x0000377e,
+ 0xb0f401f4,
+ 0x18f404e4,
+ 0x0181fe1d,
+ 0x20bd0602,
+ 0xb60412fd,
+ 0x1efd01e4,
+ 0x0018fe05,
+ 0x00051b7e,
+/* 0x0477: main_not_ctx_xfer */
+ 0x94d40ef4,
+ 0xf5f010ef,
+ 0x02f87e01,
+ 0xc70ef400,
+/* 0x0484: ih */
+ 0x88fe80f9,
+ 0xf980f901,
+ 0xf9a0f990,
+ 0xf9d0f9b0,
+ 0xbdf0f9e0,
+ 0x02004a04,
+ 0xc400aacf,
+ 0x0bf404ab,
+ 0x4e240d1f,
+ 0xeecf1a00,
+ 0x19004f00,
+ 0x7e00ffcf,
+ 0x0e000004,
+ 0x1d004001,
+ 0xbd000ef6,
+/* 0x04c1: ih_no_fifo */
+ 0x01004004,
+ 0xbd000af6,
+ 0xfcf0fc04,
+ 0xfcd0fce0,
+ 0xfca0fcb0,
+ 0xfe80fc90,
+ 0x80fc0088,
+ 0xf80032f4,
+/* 0x04e1: hub_barrier_done */
+ 0x98010f01,
+ 0xfebb040e,
+ 0x8effb204,
+ 0x7e409418,
+ 0xf800008f,
+/* 0x04f5: ctx_redswitch */
+ 0x80200f00,
+ 0xf6018500,
+ 0x04bd000f,
+/* 0x0502: ctx_redswitch_delay */
+ 0xe2b6080e,
+ 0xfd1bf401,
+ 0x0800f5f1,
+ 0x0200f5f1,
0x01850080,
0xbd000ff6,
-/* 0x04ff: ctx_redswitch_delay */
- 0xb6080e04,
- 0x1bf401e2,
- 0x00f5f1fd,
- 0x00f5f108,
- 0x85008002,
- 0x000ff601,
- 0x00f804bd,
-/* 0x0518: ctx_xfer */
- 0x02810080,
- 0xbd000ff6,
- 0x0711f404,
- 0x0004f27e,
-/* 0x0528: ctx_xfer_not_load */
- 0x0002167e,
- 0xfc8024bd,
- 0x02f60247,
- 0xf004bd00,
- 0x20b6012c,
- 0x4afc8003,
+/* 0x051b: ctx_xfer */
+ 0x8000f804,
+ 0xf6028100,
+ 0x04bd000f,
+ 0x7e0711f4,
+/* 0x052b: ctx_xfer_not_load */
+ 0x7e0004f5,
+ 0xbd000216,
+ 0x47fc8024,
0x0002f602,
- 0xacf004bd,
- 0x02a5f001,
- 0x5000008b,
- 0xb6040c98,
- 0xbcbb0fc4,
- 0x000c9800,
- 0x0e010d98,
- 0x013d7e00,
- 0x01acf000,
- 0x5040008b,
- 0xb6040c98,
- 0xbcbb0fc4,
- 0x010c9800,
- 0x98020d98,
- 0x004e060f,
- 0x013d7e08,
- 0x01acf000,
- 0x8b04a5f0,
- 0x98503000,
+ 0x2cf004bd,
+ 0x0320b601,
+ 0x024afc80,
+ 0xbd0002f6,
+ 0x01acf004,
+ 0x8b02a5f0,
+ 0x98500000,
0xc4b6040c,
0x00bcbb0f,
- 0x98020c98,
- 0x0f98030d,
- 0x02004e08,
+ 0x98000c98,
+ 0x000e010d,
0x00013d7e,
- 0x00020a7e,
- 0xf40601f4,
-/* 0x05b2: ctx_xfer_post */
- 0x277e0712,
-/* 0x05b6: ctx_xfer_done */
- 0xde7e0002,
- 0x00f80004,
- 0x00000000,
+ 0x8b01acf0,
+ 0x98504000,
+ 0xc4b6040c,
+ 0x00bcbb0f,
+ 0x98010c98,
+ 0x0f98020d,
+ 0x08004e06,
+ 0x00013d7e,
+ 0xf001acf0,
+ 0x008b04a5,
+ 0x0c985030,
+ 0x0fc4b604,
+ 0x9800bcbb,
+ 0x0d98020c,
+ 0x080f9803,
+ 0x7e02004e,
+ 0x7e00013d,
+ 0xf400020a,
+ 0x12f40601,
+/* 0x05b5: ctx_xfer_post */
+ 0x02277e07,
+/* 0x05b9: ctx_xfer_done */
+ 0x04e17e00,
+ 0x0000f800,
0x00000000,
0x00000000,
0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h
index 51f5c3c6e966..11bf363a6ae9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h
@@ -289,7 +289,7 @@ uint32_t gm107_grgpc_code[] = {
0x020014fe,
0x12004002,
0xbd0002f6,
- 0x05b04104,
+ 0x05b34104,
0x400010fe,
0x00f60700,
0x0204bd00,
@@ -308,259 +308,259 @@ uint32_t gm107_grgpc_code[] = {
0xc900800f,
0x0002f601,
0x308e04bd,
- 0x24bd500c,
- 0x44bd34bd,
-/* 0x03b0: init_unk_loop */
- 0x0000657e,
- 0xf400f6b0,
- 0x010f0e0b,
- 0xfd04f2bb,
- 0x30b6054f,
-/* 0x03c5: init_unk_next */
- 0x0120b601,
- 0xb004e0b6,
- 0x1bf40226,
-/* 0x03d1: init_unk_done */
- 0x0703b5e2,
- 0x820804b5,
- 0xcf020100,
- 0x34bd0022,
- 0x80082595,
- 0xf601c000,
+ 0xe5f0500c,
+ 0xbd24bd01,
+/* 0x03b3: init_unk_loop */
+ 0x7e44bd34,
+ 0xb0000065,
+ 0x0bf400f6,
+ 0xbb010f0e,
+ 0x4ffd04f2,
+ 0x0130b605,
+/* 0x03c8: init_unk_next */
+ 0xb60120b6,
+ 0x26b004e0,
+ 0xe21bf402,
+/* 0x03d4: init_unk_done */
+ 0xb50703b5,
+ 0x00820804,
+ 0x22cf0201,
+ 0x9534bd00,
+ 0x00800825,
+ 0x05f601c0,
+ 0x8004bd00,
+ 0xf601c100,
0x04bd0005,
- 0x01c10080,
- 0xbd0005f6,
- 0x000e9804,
- 0x7e010f98,
- 0xbb000120,
- 0x3fbb002f,
- 0x010e9800,
- 0x7e020f98,
- 0x98000120,
- 0xeffd050e,
- 0x002ebb00,
- 0x98003ebb,
- 0x0f98020e,
- 0x01207e03,
- 0x070e9800,
- 0xbb00effd,
- 0x3ebb002e,
- 0x0235b600,
- 0x01d30080,
- 0xbd0003f6,
- 0x0825b604,
- 0xb60635b6,
- 0x30b60120,
- 0x0824b601,
- 0xb20834b6,
- 0x02687e2f,
- 0x002fbb00,
- 0x0f003fbb,
- 0x8effb23f,
- 0xf0501d60,
- 0x8f7e01e5,
- 0x0c0f0000,
- 0xa88effb2,
- 0xe5f0501d,
- 0x008f7e01,
- 0x03147e00,
- 0xb23f0f00,
- 0x1d608eff,
- 0x01e5f050,
- 0x00008f7e,
- 0xffb2000f,
- 0x501d9c8e,
- 0x7e01e5f0,
- 0x0f00008f,
- 0x03147e01,
- 0x8effb200,
+ 0x98000e98,
+ 0x207e010f,
+ 0x2fbb0001,
+ 0x003fbb00,
+ 0x98010e98,
+ 0x207e020f,
+ 0x0e980001,
+ 0x00effd05,
+ 0xbb002ebb,
+ 0x0e98003e,
+ 0x030f9802,
+ 0x0001207e,
+ 0xfd070e98,
+ 0x2ebb00ef,
+ 0x003ebb00,
+ 0x800235b6,
+ 0xf601d300,
+ 0x04bd0003,
+ 0xb60825b6,
+ 0x20b60635,
+ 0x0130b601,
+ 0xb60824b6,
+ 0x2fb20834,
+ 0x0002687e,
+ 0xbb002fbb,
+ 0x3f0f003f,
+ 0x501d608e,
+ 0xb201e5f0,
+ 0x008f7eff,
+ 0x8e0c0f00,
0xf0501da8,
- 0x8f7e01e5,
- 0xff0f0000,
- 0x988effb2,
+ 0xffb201e5,
+ 0x00008f7e,
+ 0x0003147e,
+ 0x608e3f0f,
0xe5f0501d,
- 0x008f7e01,
- 0xb2020f00,
- 0x1da88eff,
+ 0x7effb201,
+ 0x0f00008f,
+ 0x1d9c8e00,
0x01e5f050,
- 0x00008f7e,
+ 0x8f7effb2,
+ 0x010f0000,
0x0003147e,
- 0x85050498,
- 0x98504000,
- 0x64b60406,
- 0x0056bb0f,
-/* 0x04e0: tpc_strand_init_tpc_loop */
- 0x05705eb8,
- 0x00657e00,
- 0xbdf6b200,
-/* 0x04ed: tpc_strand_init_idx_loop */
- 0x605eb874,
- 0x7fb20005,
- 0x00008f7e,
- 0x05885eb8,
- 0x082f9500,
- 0x00008f7e,
- 0x058c5eb8,
- 0x082f9500,
+ 0x501da88e,
+ 0xb201e5f0,
+ 0x008f7eff,
+ 0x8eff0f00,
+ 0xf0501d98,
+ 0xffb201e5,
0x00008f7e,
- 0x05905eb8,
- 0x00657e00,
- 0x06f5b600,
- 0xb601f0b6,
- 0x2fbb08f4,
- 0x003fbb00,
- 0xb60170b6,
- 0x1bf40162,
- 0x0050b7bf,
- 0x0142b608,
- 0x0fa81bf4,
- 0x8effb23f,
- 0xf0501d60,
- 0x8f7e01e5,
- 0x0d0f0000,
- 0xa88effb2,
+ 0xa88e020f,
0xe5f0501d,
- 0x008f7e01,
- 0x03147e00,
- 0x01008000,
- 0x0003f602,
- 0x24bd04bd,
- 0x801f29f0,
- 0xf6023000,
- 0x04bd0002,
-/* 0x0574: main */
- 0xf40031f4,
- 0x240d0028,
- 0x0000377e,
- 0xb0f401f4,
- 0x18f404e4,
- 0x0181fe1d,
- 0x20bd0602,
- 0xb60412fd,
- 0x1efd01e4,
- 0x0018fe05,
- 0x0006477e,
-/* 0x05a3: main_not_ctx_xfer */
- 0x94d40ef4,
- 0xf5f010ef,
- 0x02f87e01,
- 0xc70ef400,
-/* 0x05b0: ih */
- 0x88fe80f9,
- 0xf980f901,
- 0xf9a0f990,
- 0xf9d0f9b0,
- 0xbdf0f9e0,
- 0x02004a04,
- 0xc400aacf,
- 0x0bf404ab,
- 0x4e240d1f,
- 0xeecf1a00,
- 0x19004f00,
- 0x7e00ffcf,
- 0x0e000004,
- 0x1d004001,
- 0xbd000ef6,
-/* 0x05ed: ih_no_fifo */
- 0x01004004,
- 0xbd000af6,
- 0xfcf0fc04,
- 0xfcd0fce0,
- 0xfca0fcb0,
- 0xfe80fc90,
- 0x80fc0088,
- 0xf80032f4,
-/* 0x060d: hub_barrier_done */
- 0x98010f01,
- 0xfebb040e,
- 0x8effb204,
- 0x7e409418,
- 0xf800008f,
-/* 0x0621: ctx_redswitch */
- 0x80200f00,
+ 0x7effb201,
+ 0x7e00008f,
+ 0x98000314,
+ 0x00850504,
+ 0x06985040,
+ 0x0f64b604,
+/* 0x04e3: tpc_strand_init_tpc_loop */
+ 0xb80056bb,
+ 0x0005705e,
+ 0x0000657e,
+ 0x74bdf6b2,
+/* 0x04f0: tpc_strand_init_idx_loop */
+ 0x05605eb8,
+ 0x7e7fb200,
+ 0xb800008f,
+ 0x0005885e,
+ 0x7e082f95,
+ 0xb800008f,
+ 0x00058c5e,
+ 0x7e082f95,
+ 0xb800008f,
+ 0x0005905e,
+ 0x0000657e,
+ 0xb606f5b6,
+ 0xf4b601f0,
+ 0x002fbb08,
+ 0xb6003fbb,
+ 0x62b60170,
+ 0xbf1bf401,
+ 0x080050b7,
+ 0xf40142b6,
+ 0x3f0fa81b,
+ 0x501d608e,
+ 0xb201e5f0,
+ 0x008f7eff,
+ 0x8e0d0f00,
+ 0xf0501da8,
+ 0xffb201e5,
+ 0x00008f7e,
+ 0x0003147e,
+ 0x02010080,
+ 0xbd0003f6,
+ 0xf024bd04,
+ 0x00801f29,
+ 0x02f60230,
+/* 0x0577: main */
+ 0xf404bd00,
+ 0x28f40031,
+ 0x7e240d00,
+ 0xf4000037,
+ 0xe4b0f401,
+ 0x1d18f404,
+ 0x020181fe,
+ 0xfd20bd06,
+ 0xe4b60412,
+ 0x051efd01,
+ 0x7e0018fe,
+ 0xf400064a,
+/* 0x05a6: main_not_ctx_xfer */
+ 0xef94d40e,
+ 0x01f5f010,
+ 0x0002f87e,
+/* 0x05b3: ih */
+ 0xf9c70ef4,
+ 0x0188fe80,
+ 0x90f980f9,
+ 0xb0f9a0f9,
+ 0xe0f9d0f9,
+ 0x04bdf0f9,
+ 0xcf02004a,
+ 0xabc400aa,
+ 0x1f0bf404,
+ 0x004e240d,
+ 0x00eecf1a,
+ 0xcf19004f,
+ 0x047e00ff,
+ 0x010e0000,
+ 0xf61d0040,
+ 0x04bd000e,
+/* 0x05f0: ih_no_fifo */
+ 0xf6010040,
+ 0x04bd000a,
+ 0xe0fcf0fc,
+ 0xb0fcd0fc,
+ 0x90fca0fc,
+ 0x88fe80fc,
+ 0xf480fc00,
+ 0x01f80032,
+/* 0x0610: hub_barrier_done */
+ 0x0e98010f,
+ 0x04febb04,
+ 0x188effb2,
+ 0x8f7e4094,
+ 0x00f80000,
+/* 0x0624: ctx_redswitch */
+ 0x0080200f,
+ 0x0ff60185,
+ 0x0e04bd00,
+/* 0x0631: ctx_redswitch_delay */
+ 0x01e2b608,
+ 0xf1fd1bf4,
+ 0xf10800f5,
+ 0x800200f5,
0xf6018500,
0x04bd000f,
-/* 0x062e: ctx_redswitch_delay */
- 0xe2b6080e,
- 0xfd1bf401,
- 0x0800f5f1,
- 0x0200f5f1,
- 0x01850080,
- 0xbd000ff6,
-/* 0x0647: ctx_xfer */
- 0x8000f804,
- 0xf6028100,
- 0x04bd000f,
- 0xc48effb2,
- 0xe5f0501d,
- 0x008f7e01,
- 0x0711f400,
- 0x0006217e,
-/* 0x0664: ctx_xfer_not_load */
- 0x0002167e,
- 0xfc8024bd,
- 0x02f60247,
- 0xf004bd00,
- 0x20b6012c,
- 0x4afc8003,
+/* 0x064a: ctx_xfer */
+ 0x008000f8,
+ 0x0ff60281,
+ 0x8e04bd00,
+ 0xf0501dc4,
+ 0xffb201e5,
+ 0x00008f7e,
+ 0x7e0711f4,
+/* 0x0667: ctx_xfer_not_load */
+ 0x7e000624,
+ 0xbd000216,
+ 0x47fc8024,
0x0002f602,
- 0x0c0f04bd,
- 0xa88effb2,
- 0xe5f0501d,
- 0x008f7e01,
- 0x03147e00,
- 0xb23f0f00,
- 0x1d608eff,
- 0x01e5f050,
+ 0x2cf004bd,
+ 0x0320b601,
+ 0x024afc80,
+ 0xbd0002f6,
+ 0x8e0c0f04,
+ 0xf0501da8,
+ 0xffb201e5,
0x00008f7e,
- 0xffb2000f,
- 0x501d9c8e,
- 0x7e01e5f0,
+ 0x0003147e,
+ 0x608e3f0f,
+ 0xe5f0501d,
+ 0x7effb201,
0x0f00008f,
- 0x03147e01,
- 0x01fcf000,
- 0xb203f0b6,
- 0x1da88eff,
+ 0x1d9c8e00,
0x01e5f050,
- 0x00008f7e,
- 0xf001acf0,
- 0x008b02a5,
- 0x0c985000,
- 0x0fc4b604,
- 0x9800bcbb,
- 0x0d98000c,
- 0x7e000e01,
- 0xf000013d,
- 0x008b01ac,
- 0x0c985040,
- 0x0fc4b604,
- 0x9800bcbb,
- 0x0d98010c,
- 0x060f9802,
- 0x7e08004e,
- 0xf000013d,
+ 0x8f7effb2,
+ 0x010f0000,
+ 0x0003147e,
+ 0xb601fcf0,
+ 0xa88e03f0,
+ 0xe5f0501d,
+ 0x7effb201,
+ 0xf000008f,
0xa5f001ac,
- 0x30008b04,
+ 0x00008b02,
0x040c9850,
0xbb0fc4b6,
0x0c9800bc,
- 0x030d9802,
- 0x4e080f98,
- 0x3d7e0200,
- 0x0a7e0001,
- 0x147e0002,
- 0x01f40003,
- 0x1a12f406,
-/* 0x073c: ctx_xfer_post */
- 0x0002277e,
- 0xffb20d0f,
- 0x501da88e,
- 0x7e01e5f0,
- 0x7e00008f,
-/* 0x0753: ctx_xfer_done */
- 0x7e000314,
- 0xf800060d,
- 0x00000000,
+ 0x010d9800,
+ 0x3d7e000e,
+ 0xacf00001,
+ 0x40008b01,
+ 0x040c9850,
+ 0xbb0fc4b6,
+ 0x0c9800bc,
+ 0x020d9801,
+ 0x4e060f98,
+ 0x3d7e0800,
+ 0xacf00001,
+ 0x04a5f001,
+ 0x5030008b,
+ 0xb6040c98,
+ 0xbcbb0fc4,
+ 0x020c9800,
+ 0x98030d98,
+ 0x004e080f,
+ 0x013d7e02,
+ 0x020a7e00,
+ 0x03147e00,
+ 0x0601f400,
+/* 0x073f: ctx_xfer_post */
+ 0x7e1a12f4,
+ 0x0f000227,
+ 0x1da88e0d,
+ 0x01e5f050,
+ 0x8f7effb2,
+ 0x147e0000,
+/* 0x0756: ctx_xfer_done */
+ 0x107e0003,
+ 0x00f80006,
0x00000000,
0x00000000,
0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index dda7a7d224c9..9f5dfc85147a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -143,7 +143,7 @@ gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format,
static int
gf100_fermi_mthd_zbc_color(struct nvkm_object *object, void *data, u32 size)
{
- struct gf100_gr *gr = (void *)object->engine;
+ struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
union {
struct fermi_a_zbc_color_v0 v0;
} *args = data;
@@ -189,7 +189,7 @@ gf100_fermi_mthd_zbc_color(struct nvkm_object *object, void *data, u32 size)
static int
gf100_fermi_mthd_zbc_depth(struct nvkm_object *object, void *data, u32 size)
{
- struct gf100_gr *gr = (void *)object->engine;
+ struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
union {
struct fermi_a_zbc_depth_v0 v0;
} *args = data;
@@ -1530,6 +1530,8 @@ gf100_gr_oneinit(struct nvkm_gr *base)
gr->ppc_nr[i] = gr->func->ppc_nr;
for (j = 0; j < gr->ppc_nr[i]; j++) {
u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
+ if (mask)
+ gr->ppc_mask[i] |= (1 << j);
gr->ppc_tpc_nr[i][j] = hweight8(mask);
}
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index 4611961b1187..02e78b8d93f6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -97,6 +97,7 @@ struct gf100_gr {
u8 tpc_nr[GPC_MAX];
u8 tpc_total;
u8 ppc_nr[GPC_MAX];
+ u8 ppc_mask[GPC_MAX];
u8 ppc_tpc_nr[GPC_MAX][4];
struct nvkm_memory *unk4188b4;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
index ffa902ece872..05a895496fc6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
@@ -156,6 +156,7 @@ nv40_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
return -ENOMEM;
nvkm_object_ctor(&nv40_gr_chan, oclass, &chan->object);
chan->gr = gr;
+ chan->fifo = fifoch;
*pobject = &chan->object;
spin_lock_irqsave(&chan->gr->base.engine.lock, flags);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/fan.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/fan.c
index 43006db6fd58..80fed7e78dcb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/fan.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/fan.c
@@ -83,6 +83,7 @@ nvbios_fan_parse(struct nvkm_bios *bios, struct nvbios_therm_fan *fan)
fan->type = NVBIOS_THERM_FAN_UNK;
}
+ fan->fan_mode = NVBIOS_THERM_FAN_LINEAR;
fan->min_duty = nvbios_rd08(bios, data + 0x02);
fan->max_duty = nvbios_rd08(bios, data + 0x03);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c
index 895ba74057d4..1d7dd38292b3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c
@@ -97,7 +97,9 @@ static void *
nvkm_instobj_dtor(struct nvkm_memory *memory)
{
struct nvkm_instobj *iobj = nvkm_instobj(memory);
+ spin_lock(&iobj->imem->lock);
list_del(&iobj->head);
+ spin_unlock(&iobj->imem->lock);
nvkm_memory_del(&iobj->parent);
return iobj;
}
@@ -190,7 +192,9 @@ nvkm_instobj_new(struct nvkm_instmem *imem, u32 size, u32 align, bool zero,
nvkm_memory_ctor(&nvkm_instobj_func_slow, &iobj->memory);
iobj->parent = memory;
iobj->imem = imem;
+ spin_lock(&iobj->imem->lock);
list_add_tail(&iobj->head, &imem->list);
+ spin_unlock(&iobj->imem->lock);
memory = &iobj->memory;
}
@@ -309,5 +313,6 @@ nvkm_instmem_ctor(const struct nvkm_instmem_func *func,
{
nvkm_subdev_ctor(&nvkm_instmem, device, index, 0, &imem->subdev);
imem->func = func;
+ spin_lock_init(&imem->lock);
INIT_LIST_HEAD(&imem->list);
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c
index d942fa7b9f18..86f9f3b13f71 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c
@@ -81,9 +81,7 @@ gk104_pmu_pgob(struct nvkm_pmu *pmu, bool enable)
nvkm_mask(device, 0x000200, 0x00001000, 0x00001000);
nvkm_rd32(device, 0x000200);
- if ( nvkm_boolopt(device->cfgopt, "War00C800_0",
- device->quirk ? device->quirk->War00C800_0 : false)) {
- nvkm_info(&pmu->subdev, "hw bug workaround enabled\n");
+ if (nvkm_boolopt(device->cfgopt, "War00C800_0", true)) {
switch (device->chipset) {
case 0xe4:
magic(device, 0x04000000);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk104.c
index b61509e26ec9..b735173a18ff 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk104.c
@@ -59,7 +59,7 @@ gk104_volt_set(struct nvkm_volt *base, u32 uv)
duty = (uv - bios->base) * div / bios->pwm_range;
nvkm_wr32(device, 0x20340, div);
- nvkm_wr32(device, 0x20344, 0x8000000 | duty);
+ nvkm_wr32(device, 0x20344, 0x80000000 | duty);
return 0;
}
diff --git a/drivers/gpu/drm/omapdrm/omap_fbdev.c b/drivers/gpu/drm/omapdrm/omap_fbdev.c
index b8e4cdec28c3..24f92bea39c7 100644
--- a/drivers/gpu/drm/omapdrm/omap_fbdev.c
+++ b/drivers/gpu/drm/omapdrm/omap_fbdev.c
@@ -112,11 +112,8 @@ static int omap_fbdev_create(struct drm_fb_helper *helper,
dma_addr_t paddr;
int ret;
- /* only doing ARGB32 since this is what is needed to alpha-blend
- * with video overlays:
- */
sizes->surface_bpp = 32;
- sizes->surface_depth = 32;
+ sizes->surface_depth = 24;
DBG("create fbdev: %dx%d@%d (%dx%d)", sizes->surface_width,
sizes->surface_height, sizes->surface_bpp,
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 248953d2fdb7..f81fb2641097 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4173,11 +4173,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
control |= ib->length_dw | (vm_id << 24);
radeon_ring_write(ring, header);
- radeon_ring_write(ring,
-#ifdef __BIG_ENDIAN
- (2 << 0) |
-#endif
- (ib->gpu_addr & 0xFFFFFFFC));
+ radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
radeon_ring_write(ring, control);
}
@@ -8472,7 +8468,7 @@ restart_ih:
if (queue_dp)
schedule_work(&rdev->dp_work);
if (queue_hotplug)
- schedule_work(&rdev->hotplug_work);
+ schedule_delayed_work(&rdev->hotplug_work, 0);
if (queue_reset) {
rdev->needs_reset = true;
wake_up_all(&rdev->fence_queue);
@@ -9630,6 +9626,9 @@ static void dce8_program_watermarks(struct radeon_device *rdev,
(rdev->disp_priority == 2)) {
DRM_DEBUG_KMS("force priority to high\n");
}
+
+ /* Save number of lines the linebuffer leads before the scanout */
+ radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
}
/* select wm A */
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 7f33767d7ed6..2ad462896896 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2372,6 +2372,9 @@ static void evergreen_program_watermarks(struct radeon_device *rdev,
c.full = dfixed_div(c, a);
priority_b_mark = dfixed_trunc(c);
priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
+
+ /* Save number of lines the linebuffer leads before the scanout */
+ radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
}
/* select wm A */
@@ -5344,7 +5347,7 @@ restart_ih:
if (queue_dp)
schedule_work(&rdev->dp_work);
if (queue_hotplug)
- schedule_work(&rdev->hotplug_work);
+ schedule_delayed_work(&rdev->hotplug_work, 0);
if (queue_hdmi)
schedule_work(&rdev->audio_work);
if (queue_thermal && rdev->pm.dpm_enabled)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 238b13f045c1..9e7e2bf03b81 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -806,7 +806,7 @@ int r100_irq_process(struct radeon_device *rdev)
status = r100_irq_ack(rdev);
}
if (queue_hotplug)
- schedule_work(&rdev->hotplug_work);
+ schedule_delayed_work(&rdev->hotplug_work, 0);
if (rdev->msi_enabled) {
switch (rdev->family) {
case CHIP_RS400:
@@ -3217,6 +3217,9 @@ void r100_bandwidth_update(struct radeon_device *rdev)
uint32_t pixel_bytes1 = 0;
uint32_t pixel_bytes2 = 0;
+ /* Guess line buffer size to be 8192 pixels */
+ u32 lb_size = 8192;
+
if (!rdev->mode_info.mode_config_initialized)
return;
@@ -3631,6 +3634,13 @@ void r100_bandwidth_update(struct radeon_device *rdev)
DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n",
(unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
}
+
+ /* Save number of lines the linebuffer leads before the scanout */
+ if (mode1)
+ rdev->mode_info.crtcs[0]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode1->crtc_hdisplay);
+
+ if (mode2)
+ rdev->mode_info.crtcs[1]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode2->crtc_hdisplay);
}
int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 4ea5b10ff5f4..cc2fdf0be37a 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -4276,7 +4276,7 @@ restart_ih:
WREG32(IH_RB_RPTR, rptr);
}
if (queue_hotplug)
- schedule_work(&rdev->hotplug_work);
+ schedule_delayed_work(&rdev->hotplug_work, 0);
if (queue_hdmi)
schedule_work(&rdev->audio_work);
if (queue_thermal && rdev->pm.dpm_enabled)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index b6cbd816537e..87db64983ea8 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2414,7 +2414,7 @@ struct radeon_device {
struct r600_ih ih; /* r6/700 interrupt ring */
struct radeon_rlc rlc;
struct radeon_mec mec;
- struct work_struct hotplug_work;
+ struct delayed_work hotplug_work;
struct work_struct dp_work;
struct work_struct audio_work;
int num_crtc; /* number of crtcs */
diff --git a/drivers/gpu/drm/radeon/radeon_agp.c b/drivers/gpu/drm/radeon/radeon_agp.c
index fe994aac3b04..c77d349c561c 100644
--- a/drivers/gpu/drm/radeon/radeon_agp.c
+++ b/drivers/gpu/drm/radeon/radeon_agp.c
@@ -54,6 +54,9 @@ static struct radeon_agpmode_quirk radeon_agpmode_quirk_list[] = {
/* Intel 82855PM host bridge / Mobility 9600 M10 RV350 Needs AGPMode 1 (lp #195051) */
{ PCI_VENDOR_ID_INTEL, 0x3340, PCI_VENDOR_ID_ATI, 0x4e50,
PCI_VENDOR_ID_IBM, 0x0550, 1},
+ /* Intel 82855PM host bridge / RV250/M9 GL [Mobility FireGL 9000/Radeon 9000] needs AGPMode 1 (Thinkpad T40p) */
+ { PCI_VENDOR_ID_INTEL, 0x3340, PCI_VENDOR_ID_ATI, 0x4c66,
+ PCI_VENDOR_ID_IBM, 0x054d, 1},
/* Intel 82855PM host bridge / Mobility M7 needs AGPMode 1 */
{ PCI_VENDOR_ID_INTEL, 0x3340, PCI_VENDOR_ID_ATI, 0x4c57,
PCI_VENDOR_ID_IBM, 0x0530, 1},
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 5a2cafb4f1bc..340f3f549f29 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -1234,13 +1234,32 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
if (r < 0)
return connector_status_disconnected;
+ if (radeon_connector->detected_hpd_without_ddc) {
+ force = true;
+ radeon_connector->detected_hpd_without_ddc = false;
+ }
+
if (!force && radeon_check_hpd_status_unchanged(connector)) {
ret = connector->status;
goto exit;
}
- if (radeon_connector->ddc_bus)
+ if (radeon_connector->ddc_bus) {
dret = radeon_ddc_probe(radeon_connector, false);
+
+ /* Sometimes the pins required for the DDC probe on DVI
+ * connectors don't make contact at the same time that the ones
+ * for HPD do. If the DDC probe fails even though we had an HPD
+ * signal, try again later */
+ if (!dret && !force &&
+ connector->status != connector_status_connected) {
+ DRM_DEBUG_KMS("hpd detected without ddc, retrying in 1 second\n");
+ radeon_connector->detected_hpd_without_ddc = true;
+ schedule_delayed_work(&rdev->hotplug_work,
+ msecs_to_jiffies(1000));
+ goto exit;
+ }
+ }
if (dret) {
radeon_connector->detected_by_load = false;
radeon_connector_free_edid(connector);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index a8d9927ed9eb..1eca0acac016 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -322,7 +322,9 @@ void radeon_crtc_handle_vblank(struct radeon_device *rdev, int crtc_id)
* to complete in this vblank?
*/
if (update_pending &&
- (DRM_SCANOUTPOS_VALID & radeon_get_crtc_scanoutpos(rdev->ddev, crtc_id, 0,
+ (DRM_SCANOUTPOS_VALID & radeon_get_crtc_scanoutpos(rdev->ddev,
+ crtc_id,
+ USE_REAL_VBLANKSTART,
&vpos, &hpos, NULL, NULL,
&rdev->mode_info.crtcs[crtc_id]->base.hwmode)) &&
((vpos >= (99 * rdev->mode_info.crtcs[crtc_id]->base.hwmode.crtc_vdisplay)/100) ||
@@ -401,6 +403,8 @@ static void radeon_flip_work_func(struct work_struct *__work)
struct drm_crtc *crtc = &radeon_crtc->base;
unsigned long flags;
int r;
+ int vpos, hpos, stat, min_udelay;
+ struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
down_read(&rdev->exclusive_lock);
if (work->fence) {
@@ -437,6 +441,41 @@ static void radeon_flip_work_func(struct work_struct *__work)
/* set the proper interrupt */
radeon_irq_kms_pflip_irq_get(rdev, radeon_crtc->crtc_id);
+ /* If this happens to execute within the "virtually extended" vblank
+ * interval before the start of the real vblank interval then it needs
+ * to delay programming the mmio flip until the real vblank is entered.
+ * This prevents completing a flip too early due to the way we fudge
+ * our vblank counter and vblank timestamps in order to work around the
+ * problem that the hw fires vblank interrupts before actual start of
+ * vblank (when line buffer refilling is done for a frame). It
+ * complements the fudging logic in radeon_get_crtc_scanoutpos() for
+ * timestamping and radeon_get_vblank_counter_kms() for vblank counts.
+ *
+ * In practice this won't execute very often unless on very fast
+ * machines because the time window for this to happen is very small.
+ */
+ for (;;) {
+ /* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
+ * start in hpos, and to the "fudged earlier" vblank start in
+ * vpos.
+ */
+ stat = radeon_get_crtc_scanoutpos(rdev->ddev, work->crtc_id,
+ GET_DISTANCE_TO_VBLANKSTART,
+ &vpos, &hpos, NULL, NULL,
+ &crtc->hwmode);
+
+ if ((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
+ (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE) ||
+ !(vpos >= 0 && hpos <= 0))
+ break;
+
+ /* Sleep at least until estimated real start of hw vblank */
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+ min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
+ usleep_range(min_udelay, 2 * min_udelay);
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
+ };
+
/* do the flip (mmio) */
radeon_page_flip(rdev, radeon_crtc->crtc_id, work->base);
@@ -1768,6 +1807,15 @@ bool radeon_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
* \param dev Device to query.
* \param crtc Crtc to query.
* \param flags Flags from caller (DRM_CALLED_FROM_VBLIRQ or 0).
+ * For driver internal use only also supports these flags:
+ *
+ * USE_REAL_VBLANKSTART to use the real start of vblank instead
+ * of a fudged earlier start of vblank.
+ *
+ * GET_DISTANCE_TO_VBLANKSTART to return distance to the
+ * fudged earlier start of vblank in *vpos and the distance
+ * to true start of vblank in *hpos.
+ *
* \param *vpos Location where vertical scanout position should be stored.
* \param *hpos Location where horizontal scanout position should go.
* \param *stime Target location for timestamp taken immediately before
@@ -1911,10 +1959,40 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
vbl_end = 0;
}
+ /* Called from driver internal vblank counter query code? */
+ if (flags & GET_DISTANCE_TO_VBLANKSTART) {
+ /* Caller wants distance from real vbl_start in *hpos */
+ *hpos = *vpos - vbl_start;
+ }
+
+ /* Fudge vblank to start a few scanlines earlier to handle the
+ * problem that vblank irqs fire a few scanlines before start
+ * of vblank. Some driver internal callers need the true vblank
+ * start to be used and signal this via the USE_REAL_VBLANKSTART flag.
+ *
+ * The cause of the "early" vblank irq is that the irq is triggered
+ * by the line buffer logic when the line buffer read position enters
+ * the vblank, whereas our crtc scanout position naturally lags the
+ * line buffer read position.
+ */
+ if (!(flags & USE_REAL_VBLANKSTART))
+ vbl_start -= rdev->mode_info.crtcs[pipe]->lb_vblank_lead_lines;
+
/* Test scanout position against vblank region. */
if ((*vpos < vbl_start) && (*vpos >= vbl_end))
in_vbl = false;
+ /* In vblank? */
+ if (in_vbl)
+ ret |= DRM_SCANOUTPOS_IN_VBLANK;
+
+ /* Called from driver internal vblank counter query code? */
+ if (flags & GET_DISTANCE_TO_VBLANKSTART) {
+ /* Caller wants distance from fudged earlier vbl_start */
+ *vpos -= vbl_start;
+ return ret;
+ }
+
/* Check if inside vblank area and apply corrective offsets:
* vpos will then be >=0 in video scanout area, but negative
* within vblank area, counting down the number of lines until
@@ -1930,31 +2008,5 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
/* Correct for shifted end of vbl at vbl_end. */
*vpos = *vpos - vbl_end;
- /* In vblank? */
- if (in_vbl)
- ret |= DRM_SCANOUTPOS_IN_VBLANK;
-
- /* Is vpos outside nominal vblank area, but less than
- * 1/100 of a frame height away from start of vblank?
- * If so, assume this isn't a massively delayed vblank
- * interrupt, but a vblank interrupt that fired a few
- * microseconds before true start of vblank. Compensate
- * by adding a full frame duration to the final timestamp.
- * Happens, e.g., on ATI R500, R600.
- *
- * We only do this if DRM_CALLED_FROM_VBLIRQ.
- */
- if ((flags & DRM_CALLED_FROM_VBLIRQ) && !in_vbl) {
- vbl_start = mode->crtc_vdisplay;
- vtotal = mode->crtc_vtotal;
-
- if (vbl_start - *vpos < vtotal / 100) {
- *vpos -= vtotal;
-
- /* Signal this correction as "applied". */
- ret |= 0x8;
- }
- }
-
return ret;
}
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
index 171d3e43c30c..979f3bf65f2c 100644
--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@@ -74,7 +74,7 @@ irqreturn_t radeon_driver_irq_handler_kms(int irq, void *arg)
static void radeon_hotplug_work_func(struct work_struct *work)
{
struct radeon_device *rdev = container_of(work, struct radeon_device,
- hotplug_work);
+ hotplug_work.work);
struct drm_device *dev = rdev->ddev;
struct drm_mode_config *mode_config = &dev->mode_config;
struct drm_connector *connector;
@@ -302,7 +302,7 @@ int radeon_irq_kms_init(struct radeon_device *rdev)
}
}
- INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func);
+ INIT_DELAYED_WORK(&rdev->hotplug_work, radeon_hotplug_work_func);
INIT_WORK(&rdev->dp_work, radeon_dp_work_func);
INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi);
@@ -310,7 +310,7 @@ int radeon_irq_kms_init(struct radeon_device *rdev)
r = drm_irq_install(rdev->ddev, rdev->ddev->pdev->irq);
if (r) {
rdev->irq.installed = false;
- flush_work(&rdev->hotplug_work);
+ flush_delayed_work(&rdev->hotplug_work);
return r;
}
@@ -333,7 +333,7 @@ void radeon_irq_kms_fini(struct radeon_device *rdev)
rdev->irq.installed = false;
if (rdev->msi_enabled)
pci_disable_msi(rdev->pdev);
- flush_work(&rdev->hotplug_work);
+ flush_delayed_work(&rdev->hotplug_work);
}
}
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 0ec6fcca16d3..d290a8a09036 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -755,6 +755,8 @@ void radeon_driver_preclose_kms(struct drm_device *dev,
*/
u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc)
{
+ int vpos, hpos, stat;
+ u32 count;
struct radeon_device *rdev = dev->dev_private;
if (crtc < 0 || crtc >= rdev->num_crtc) {
@@ -762,7 +764,53 @@ u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc)
return -EINVAL;
}
- return radeon_get_vblank_counter(rdev, crtc);
+ /* The hw increments its frame counter at start of vsync, not at start
+ * of vblank, as is required by DRM core vblank counter handling.
+ * Cook the hw count here to make it appear to the caller as if it
+ * incremented at start of vblank. We measure distance to start of
+ * vblank in vpos. vpos therefore will be >= 0 between start of vblank
+ * and start of vsync, so vpos >= 0 means to bump the hw frame counter
+ * result by 1 to give the proper appearance to caller.
+ */
+ if (rdev->mode_info.crtcs[crtc]) {
+ /* Repeat readout if needed to provide stable result if
+ * we cross start of vsync during the queries.
+ */
+ do {
+ count = radeon_get_vblank_counter(rdev, crtc);
+ /* Ask radeon_get_crtc_scanoutpos to return vpos as
+ * distance to start of vblank, instead of regular
+ * vertical scanout pos.
+ */
+ stat = radeon_get_crtc_scanoutpos(
+ dev, crtc, GET_DISTANCE_TO_VBLANKSTART,
+ &vpos, &hpos, NULL, NULL,
+ &rdev->mode_info.crtcs[crtc]->base.hwmode);
+ } while (count != radeon_get_vblank_counter(rdev, crtc));
+
+ if (((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
+ (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE))) {
+ DRM_DEBUG_VBL("Query failed! stat %d\n", stat);
+ }
+ else {
+ DRM_DEBUG_VBL("crtc %d: dist from vblank start %d\n",
+ crtc, vpos);
+
+ /* Bump counter if we are at >= leading edge of vblank,
+ * but before vsync where vpos would turn negative and
+ * the hw counter really increments.
+ */
+ if (vpos >= 0)
+ count++;
+ }
+ }
+ else {
+ /* Fallback to use value as is. */
+ count = radeon_get_vblank_counter(rdev, crtc);
+ DRM_DEBUG_VBL("NULL mode info! Returned count may be wrong.\n");
+ }
+
+ return count;
}
/**
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 830e171c3a9e..bba112628b47 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -367,6 +367,7 @@ struct radeon_crtc {
u32 line_time;
u32 wm_low;
u32 wm_high;
+ u32 lb_vblank_lead_lines;
struct drm_display_mode hw_mode;
enum radeon_output_csc output_csc;
};
@@ -553,6 +554,7 @@ struct radeon_connector {
void *con_priv;
bool dac_load_detect;
bool detected_by_load; /* if the connection status was determined by load */
+ bool detected_hpd_without_ddc; /* if an HPD signal was detected on DVI, but ddc probing failed */
uint16_t connector_object_id;
struct radeon_hpd hpd;
struct radeon_router router;
@@ -686,6 +688,9 @@ struct atom_voltage_table
struct atom_voltage_table_entry entries[MAX_VOLTAGE_ENTRIES];
};
+/* Driver internal use only flags of radeon_get_crtc_scanoutpos() */
+#define USE_REAL_VBLANKSTART (1 << 30)
+#define GET_DISTANCE_TO_VBLANKSTART (1 << 31)
extern void
radeon_add_atom_connector(struct drm_device *dev,
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index d3024883b844..84d45633d28c 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -221,11 +221,17 @@ int radeon_bo_create(struct radeon_device *rdev,
if (!(rdev->flags & RADEON_IS_PCIE))
bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
+ /* Write-combined CPU mappings of GTT cause GPU hangs with RV6xx
+ * See https://bugs.freedesktop.org/show_bug.cgi?id=91268
+ */
+ if (rdev->family >= CHIP_RV610 && rdev->family <= CHIP_RV635)
+ bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
+
#ifdef CONFIG_X86_32
/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
* See https://bugs.freedesktop.org/show_bug.cgi?id=84627
*/
- bo->flags &= ~RADEON_GEM_GTT_WC;
+ bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
/* Don't try to enable write-combining when it can't work, or things
* may be slow
@@ -235,9 +241,10 @@ int radeon_bo_create(struct radeon_device *rdev,
#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
thanks to write-combining
- DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
- "better performance thanks to write-combining\n");
- bo->flags &= ~RADEON_GEM_GTT_WC;
+ if (bo->flags & RADEON_GEM_GTT_WC)
+ DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
+ "better performance thanks to write-combining\n");
+ bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
#endif
radeon_ttm_placement_from_domain(bo, domain);
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 6d80dde23400..59abebd6b5dc 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -1542,8 +1542,7 @@ int radeon_pm_late_init(struct radeon_device *rdev)
ret = device_create_file(rdev->dev, &dev_attr_power_method);
if (ret)
DRM_ERROR("failed to create device file for power method\n");
- if (!ret)
- rdev->pm.sysfs_initialized = true;
+ rdev->pm.sysfs_initialized = true;
}
mutex_lock(&rdev->pm.mutex);
@@ -1757,7 +1756,9 @@ static bool radeon_pm_in_vbl(struct radeon_device *rdev)
*/
for (crtc = 0; (crtc < rdev->num_crtc) && in_vbl; crtc++) {
if (rdev->pm.active_crtcs & (1 << crtc)) {
- vbl_status = radeon_get_crtc_scanoutpos(rdev->ddev, crtc, 0,
+ vbl_status = radeon_get_crtc_scanoutpos(rdev->ddev,
+ crtc,
+ USE_REAL_VBLANKSTART,
&vpos, &hpos, NULL, NULL,
&rdev->mode_info.crtcs[crtc]->base.hwmode);
if ((vbl_status & DRM_SCANOUTPOS_VALID) &&
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index 574f62bbd215..7eb1ae758906 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -361,31 +361,31 @@ int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring,
/* stitch together an VCE create msg */
ib.length_dw = 0;
- ib.ptr[ib.length_dw++] = 0x0000000c; /* len */
- ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */
- ib.ptr[ib.length_dw++] = handle;
-
- ib.ptr[ib.length_dw++] = 0x00000030; /* len */
- ib.ptr[ib.length_dw++] = 0x01000001; /* create cmd */
- ib.ptr[ib.length_dw++] = 0x00000000;
- ib.ptr[ib.length_dw++] = 0x00000042;
- ib.ptr[ib.length_dw++] = 0x0000000a;
- ib.ptr[ib.length_dw++] = 0x00000001;
- ib.ptr[ib.length_dw++] = 0x00000080;
- ib.ptr[ib.length_dw++] = 0x00000060;
- ib.ptr[ib.length_dw++] = 0x00000100;
- ib.ptr[ib.length_dw++] = 0x00000100;
- ib.ptr[ib.length_dw++] = 0x0000000c;
- ib.ptr[ib.length_dw++] = 0x00000000;
-
- ib.ptr[ib.length_dw++] = 0x00000014; /* len */
- ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */
- ib.ptr[ib.length_dw++] = upper_32_bits(dummy);
- ib.ptr[ib.length_dw++] = dummy;
- ib.ptr[ib.length_dw++] = 0x00000001;
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000c); /* len */
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001); /* session cmd */
+ ib.ptr[ib.length_dw++] = cpu_to_le32(handle);
+
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000030); /* len */
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x01000001); /* create cmd */
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000000);
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000042);
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000a);
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001);
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000080);
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000060);
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000100);
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000100);
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000c);
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000000);
+
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000014); /* len */
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x05000005); /* feedback buffer */
+ ib.ptr[ib.length_dw++] = cpu_to_le32(upper_32_bits(dummy));
+ ib.ptr[ib.length_dw++] = cpu_to_le32(dummy);
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001);
for (i = ib.length_dw; i < ib_size_dw; ++i)
- ib.ptr[i] = 0x0;
+ ib.ptr[i] = cpu_to_le32(0x0);
r = radeon_ib_schedule(rdev, &ib, NULL, false);
if (r) {
@@ -428,21 +428,21 @@ int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring,
/* stitch together an VCE destroy msg */
ib.length_dw = 0;
- ib.ptr[ib.length_dw++] = 0x0000000c; /* len */
- ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */
- ib.ptr[ib.length_dw++] = handle;
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000c); /* len */
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001); /* session cmd */
+ ib.ptr[ib.length_dw++] = cpu_to_le32(handle);
- ib.ptr[ib.length_dw++] = 0x00000014; /* len */
- ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */
- ib.ptr[ib.length_dw++] = upper_32_bits(dummy);
- ib.ptr[ib.length_dw++] = dummy;
- ib.ptr[ib.length_dw++] = 0x00000001;
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000014); /* len */
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x05000005); /* feedback buffer */
+ ib.ptr[ib.length_dw++] = cpu_to_le32(upper_32_bits(dummy));
+ ib.ptr[ib.length_dw++] = cpu_to_le32(dummy);
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001);
- ib.ptr[ib.length_dw++] = 0x00000008; /* len */
- ib.ptr[ib.length_dw++] = 0x02000001; /* destroy cmd */
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000008); /* len */
+ ib.ptr[ib.length_dw++] = cpu_to_le32(0x02000001); /* destroy cmd */
for (i = ib.length_dw; i < ib_size_dw; ++i)
- ib.ptr[i] = 0x0;
+ ib.ptr[i] = cpu_to_le32(0x0);
r = radeon_ib_schedule(rdev, &ib, NULL, false);
if (r) {
@@ -699,12 +699,12 @@ bool radeon_vce_semaphore_emit(struct radeon_device *rdev,
{
uint64_t addr = semaphore->gpu_addr;
- radeon_ring_write(ring, VCE_CMD_SEMAPHORE);
- radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
- radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
- radeon_ring_write(ring, 0x01003000 | (emit_wait ? 1 : 0));
+ radeon_ring_write(ring, cpu_to_le32(VCE_CMD_SEMAPHORE));
+ radeon_ring_write(ring, cpu_to_le32((addr >> 3) & 0x000FFFFF));
+ radeon_ring_write(ring, cpu_to_le32((addr >> 23) & 0x000FFFFF));
+ radeon_ring_write(ring, cpu_to_le32(0x01003000 | (emit_wait ? 1 : 0)));
if (!emit_wait)
- radeon_ring_write(ring, VCE_CMD_END);
+ radeon_ring_write(ring, cpu_to_le32(VCE_CMD_END));
return true;
}
@@ -719,10 +719,10 @@ bool radeon_vce_semaphore_emit(struct radeon_device *rdev,
void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
{
struct radeon_ring *ring = &rdev->ring[ib->ring];
- radeon_ring_write(ring, VCE_CMD_IB);
- radeon_ring_write(ring, ib->gpu_addr);
- radeon_ring_write(ring, upper_32_bits(ib->gpu_addr));
- radeon_ring_write(ring, ib->length_dw);
+ radeon_ring_write(ring, cpu_to_le32(VCE_CMD_IB));
+ radeon_ring_write(ring, cpu_to_le32(ib->gpu_addr));
+ radeon_ring_write(ring, cpu_to_le32(upper_32_bits(ib->gpu_addr)));
+ radeon_ring_write(ring, cpu_to_le32(ib->length_dw));
}
/**
@@ -738,12 +738,12 @@ void radeon_vce_fence_emit(struct radeon_device *rdev,
struct radeon_ring *ring = &rdev->ring[fence->ring];
uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr;
- radeon_ring_write(ring, VCE_CMD_FENCE);
- radeon_ring_write(ring, addr);
- radeon_ring_write(ring, upper_32_bits(addr));
- radeon_ring_write(ring, fence->seq);
- radeon_ring_write(ring, VCE_CMD_TRAP);
- radeon_ring_write(ring, VCE_CMD_END);
+ radeon_ring_write(ring, cpu_to_le32(VCE_CMD_FENCE));
+ radeon_ring_write(ring, cpu_to_le32(addr));
+ radeon_ring_write(ring, cpu_to_le32(upper_32_bits(addr)));
+ radeon_ring_write(ring, cpu_to_le32(fence->seq));
+ radeon_ring_write(ring, cpu_to_le32(VCE_CMD_TRAP));
+ radeon_ring_write(ring, cpu_to_le32(VCE_CMD_END));
}
/**
@@ -765,7 +765,7 @@ int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
ring->idx, r);
return r;
}
- radeon_ring_write(ring, VCE_CMD_END);
+ radeon_ring_write(ring, cpu_to_le32(VCE_CMD_END));
radeon_ring_unlock_commit(rdev, ring, false);
for (i = 0; i < rdev->usec_timeout; i++) {
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 97a904835759..6244f4e44e9a 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -813,7 +813,7 @@ int rs600_irq_process(struct radeon_device *rdev)
status = rs600_irq_ack(rdev);
}
if (queue_hotplug)
- schedule_work(&rdev->hotplug_work);
+ schedule_delayed_work(&rdev->hotplug_work, 0);
if (queue_hdmi)
schedule_work(&rdev->audio_work);
if (rdev->msi_enabled) {
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index 516ca27cfa12..6bc44c24e837 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -207,6 +207,9 @@ void rs690_line_buffer_adjust(struct radeon_device *rdev,
{
u32 tmp;
+ /* Guess line buffer size to be 8192 pixels */
+ u32 lb_size = 8192;
+
/*
* Line Buffer Setup
* There is a single line buffer shared by both display controllers.
@@ -243,6 +246,13 @@ void rs690_line_buffer_adjust(struct radeon_device *rdev,
tmp |= V_006520_DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q;
}
WREG32(R_006520_DC_LB_MEMORY_SPLIT, tmp);
+
+ /* Save number of lines the linebuffer leads before the scanout */
+ if (mode1)
+ rdev->mode_info.crtcs[0]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode1->crtc_hdisplay);
+
+ if (mode2)
+ rdev->mode_info.crtcs[1]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode2->crtc_hdisplay);
}
struct rs690_watermark {
diff --git a/drivers/gpu/drm/radeon/rv730_dpm.c b/drivers/gpu/drm/radeon/rv730_dpm.c
index 3f5e1cf138ba..d37ba2cb886e 100644
--- a/drivers/gpu/drm/radeon/rv730_dpm.c
+++ b/drivers/gpu/drm/radeon/rv730_dpm.c
@@ -464,7 +464,7 @@ void rv730_stop_dpm(struct radeon_device *rdev)
result = rv770_send_msg_to_smc(rdev, PPSMC_MSG_TwoLevelsDisabled);
if (result != PPSMC_Result_OK)
- DRM_ERROR("Could not force DPM to low\n");
+ DRM_DEBUG("Could not force DPM to low\n");
WREG32_P(GENERAL_PWRMGT, 0, ~GLOBAL_PWRMGT_EN);
diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c
index b9c770745a7a..e830c8935db0 100644
--- a/drivers/gpu/drm/radeon/rv770_dpm.c
+++ b/drivers/gpu/drm/radeon/rv770_dpm.c
@@ -193,7 +193,7 @@ void rv770_stop_dpm(struct radeon_device *rdev)
result = rv770_send_msg_to_smc(rdev, PPSMC_MSG_TwoLevelsDisabled);
if (result != PPSMC_Result_OK)
- DRM_ERROR("Could not force DPM to low.\n");
+ DRM_DEBUG("Could not force DPM to low.\n");
WREG32_P(GENERAL_PWRMGT, 0, ~GLOBAL_PWRMGT_EN);
@@ -1418,7 +1418,7 @@ int rv770_resume_smc(struct radeon_device *rdev)
int rv770_set_sw_state(struct radeon_device *rdev)
{
if (rv770_send_msg_to_smc(rdev, PPSMC_MSG_SwitchToSwState) != PPSMC_Result_OK)
- return -EINVAL;
+ DRM_DEBUG("rv770_set_sw_state failed\n");
return 0;
}
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 07037e32dea3..f878d6962da5 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -2376,6 +2376,9 @@ static void dce6_program_watermarks(struct radeon_device *rdev,
c.full = dfixed_div(c, a);
priority_b_mark = dfixed_trunc(c);
priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
+
+ /* Save number of lines the linebuffer leads before the scanout */
+ radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
}
/* select wm A */
@@ -6848,7 +6851,7 @@ restart_ih:
if (queue_dp)
schedule_work(&rdev->dp_work);
if (queue_hotplug)
- schedule_work(&rdev->hotplug_work);
+ schedule_delayed_work(&rdev->hotplug_work, 0);
if (queue_thermal && rdev->pm.dpm_enabled)
schedule_work(&rdev->pm.dpm.thermal.work);
rdev->ih.rptr = rptr;
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index e72bf46042e0..a82b891ae1fe 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2927,7 +2927,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = {
{ PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 },
{ PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0xe271, 0, 120000 },
{ PCI_VENDOR_ID_ATI, 0x6810, 0x174b, 0xe271, 85000, 90000 },
- { PCI_VENDOR_ID_ATI, 0x6811, 0x1762, 0x2015, 0, 120000 },
+ { PCI_VENDOR_ID_ATI, 0x6811, 0x1462, 0x2015, 0, 120000 },
{ PCI_VENDOR_ID_ATI, 0x6811, 0x1043, 0x2015, 0, 120000 },
{ 0, 0, 0, 0 },
};
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
index 8caea0a33dd8..d908321b94ce 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
@@ -67,6 +67,7 @@ static int rockchip_drm_gem_object_mmap(struct drm_gem_object *obj,
* VM_PFNMAP flag that was set by drm_gem_mmap_obj()/drm_gem_mmap().
*/
vma->vm_flags &= ~VM_PFNMAP;
+ vma->vm_pgoff = 0;
ret = dma_mmap_attrs(drm->dev, vma, rk_obj->kvaddr, rk_obj->dma_addr,
obj->size, &rk_obj->dma_attrs);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 5d8ae5e49c44..03c47eeadc81 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -374,6 +374,7 @@ static const struct of_device_id vop_driver_dt_match[] = {
.data = &rk3288_vop },
{},
};
+MODULE_DEVICE_TABLE(of, vop_driver_dt_match);
static inline void vop_writel(struct vop *vop, uint32_t offset, uint32_t v)
{
@@ -959,8 +960,8 @@ static int vop_update_plane_event(struct drm_plane *plane,
val = (dest.y2 - dest.y1 - 1) << 16;
val |= (dest.x2 - dest.x1 - 1) & 0xffff;
VOP_WIN_SET(vop, win, dsp_info, val);
- val = (dsp_sty - 1) << 16;
- val |= (dsp_stx - 1) & 0xffff;
+ val = dsp_sty << 16;
+ val |= dsp_stx & 0xffff;
VOP_WIN_SET(vop, win, dsp_st, val);
VOP_WIN_SET(vop, win, rb_swap, rb_swap);
@@ -1289,7 +1290,7 @@ static void vop_win_state_complete(struct vop_win *vop_win,
if (state->event) {
spin_lock_irqsave(&drm->event_lock, flags);
- drm_send_vblank_event(drm, -1, state->event);
+ drm_crtc_send_vblank_event(crtc, state->event);
spin_unlock_irqrestore(&drm->event_lock, flags);
}
@@ -1575,32 +1576,25 @@ static int vop_initial(struct vop *vop)
return PTR_ERR(vop->dclk);
}
- ret = clk_prepare(vop->hclk);
- if (ret < 0) {
- dev_err(vop->dev, "failed to prepare hclk\n");
- return ret;
- }
-
ret = clk_prepare(vop->dclk);
if (ret < 0) {
dev_err(vop->dev, "failed to prepare dclk\n");
- goto err_unprepare_hclk;
+ return ret;
}
- ret = clk_prepare(vop->aclk);
+ /* Enable both the hclk and aclk to setup the vop */
+ ret = clk_prepare_enable(vop->hclk);
if (ret < 0) {
- dev_err(vop->dev, "failed to prepare aclk\n");
+ dev_err(vop->dev, "failed to prepare/enable hclk\n");
goto err_unprepare_dclk;
}
- /*
- * enable hclk, so that we can config vop register.
- */
- ret = clk_enable(vop->hclk);
+ ret = clk_prepare_enable(vop->aclk);
if (ret < 0) {
- dev_err(vop->dev, "failed to prepare aclk\n");
- goto err_unprepare_aclk;
+ dev_err(vop->dev, "failed to prepare/enable aclk\n");
+ goto err_disable_hclk;
}
+
/*
* do hclk_reset, reset all vop registers.
*/
@@ -1608,7 +1602,7 @@ static int vop_initial(struct vop *vop)
if (IS_ERR(ahb_rst)) {
dev_err(vop->dev, "failed to get ahb reset\n");
ret = PTR_ERR(ahb_rst);
- goto err_disable_hclk;
+ goto err_disable_aclk;
}
reset_control_assert(ahb_rst);
usleep_range(10, 20);
@@ -1634,26 +1628,25 @@ static int vop_initial(struct vop *vop)
if (IS_ERR(vop->dclk_rst)) {
dev_err(vop->dev, "failed to get dclk reset\n");
ret = PTR_ERR(vop->dclk_rst);
- goto err_unprepare_aclk;
+ goto err_disable_aclk;
}
reset_control_assert(vop->dclk_rst);
usleep_range(10, 20);
reset_control_deassert(vop->dclk_rst);
clk_disable(vop->hclk);
+ clk_disable(vop->aclk);
vop->is_enabled = false;
return 0;
+err_disable_aclk:
+ clk_disable_unprepare(vop->aclk);
err_disable_hclk:
- clk_disable(vop->hclk);
-err_unprepare_aclk:
- clk_unprepare(vop->aclk);
+ clk_disable_unprepare(vop->hclk);
err_unprepare_dclk:
clk_unprepare(vop->dclk);
-err_unprepare_hclk:
- clk_unprepare(vop->hclk);
return ret;
}
diff --git a/drivers/gpu/drm/ttm/ttm_lock.c b/drivers/gpu/drm/ttm/ttm_lock.c
index 6a954544727f..f154fb1929bd 100644
--- a/drivers/gpu/drm/ttm/ttm_lock.c
+++ b/drivers/gpu/drm/ttm/ttm_lock.c
@@ -180,7 +180,7 @@ int ttm_write_lock(struct ttm_lock *lock, bool interruptible)
spin_unlock(&lock->lock);
}
} else
- wait_event(lock->queue, __ttm_read_lock(lock));
+ wait_event(lock->queue, __ttm_write_lock(lock));
return ret;
}
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 7a9f4768591e..265064c62d49 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -168,7 +168,7 @@ static int vc4_get_clock_select(struct drm_crtc *crtc)
struct drm_connector *connector;
drm_for_each_connector(connector, crtc->dev) {
- if (connector && connector->state->crtc == crtc) {
+ if (connector->state->crtc == crtc) {
struct drm_encoder *encoder = connector->encoder;
struct vc4_encoder *vc4_encoder =
to_vc4_encoder(encoder);
@@ -401,7 +401,8 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc,
dlist_next++;
HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
- (u32 *)vc4_crtc->dlist - (u32 *)vc4->hvs->dlist);
+ (u32 __iomem *)vc4_crtc->dlist -
+ (u32 __iomem *)vc4->hvs->dlist);
/* Make the next display list start after ours. */
vc4_crtc->dlist_size -= (dlist_next - vc4_crtc->dlist);
@@ -591,14 +592,14 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
* that will take too much.
*/
primary_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_PRIMARY);
- if (!primary_plane) {
+ if (IS_ERR(primary_plane)) {
dev_err(dev, "failed to construct primary plane\n");
ret = PTR_ERR(primary_plane);
goto err;
}
cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR);
- if (!cursor_plane) {
+ if (IS_ERR(cursor_plane)) {
dev_err(dev, "failed to construct cursor plane\n");
ret = PTR_ERR(cursor_plane);
goto err_primary;
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 6e730605edcc..d5db9e0f3b73 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -259,7 +259,6 @@ static struct platform_driver vc4_platform_driver = {
.remove = vc4_platform_drm_remove,
.driver = {
.name = "vc4-drm",
- .owner = THIS_MODULE,
.of_match_table = vc4_of_match,
},
};
diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c
index ab1673f672a4..8098c5b21ba4 100644
--- a/drivers/gpu/drm/vc4/vc4_hvs.c
+++ b/drivers/gpu/drm/vc4/vc4_hvs.c
@@ -75,10 +75,10 @@ void vc4_hvs_dump_state(struct drm_device *dev)
for (i = 0; i < 64; i += 4) {
DRM_INFO("0x%08x (%s): 0x%08x 0x%08x 0x%08x 0x%08x\n",
i * 4, i < HVS_BOOTLOADER_DLIST_END ? "B" : "D",
- ((uint32_t *)vc4->hvs->dlist)[i + 0],
- ((uint32_t *)vc4->hvs->dlist)[i + 1],
- ((uint32_t *)vc4->hvs->dlist)[i + 2],
- ((uint32_t *)vc4->hvs->dlist)[i + 3]);
+ readl((u32 __iomem *)vc4->hvs->dlist + i + 0),
+ readl((u32 __iomem *)vc4->hvs->dlist + i + 1),
+ readl((u32 __iomem *)vc4->hvs->dlist + i + 2),
+ readl((u32 __iomem *)vc4->hvs->dlist + i + 3));
}
}
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index cdd8b10c0147..887f3caad0be 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -70,7 +70,7 @@ static bool plane_enabled(struct drm_plane_state *state)
return state->fb && state->crtc;
}
-struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
+static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
{
struct vc4_plane_state *vc4_state;
@@ -97,8 +97,8 @@ struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
return &vc4_state->base;
}
-void vc4_plane_destroy_state(struct drm_plane *plane,
- struct drm_plane_state *state)
+static void vc4_plane_destroy_state(struct drm_plane *plane,
+ struct drm_plane_state *state)
{
struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
@@ -108,7 +108,7 @@ void vc4_plane_destroy_state(struct drm_plane *plane,
}
/* Called during init to allocate the plane's atomic state. */
-void vc4_plane_reset(struct drm_plane *plane)
+static void vc4_plane_reset(struct drm_plane *plane)
{
struct vc4_plane_state *vc4_state;
@@ -157,6 +157,16 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
int crtc_w = state->crtc_w;
int crtc_h = state->crtc_h;
+ if (state->crtc_w << 16 != state->src_w ||
+ state->crtc_h << 16 != state->src_h) {
+ /* We don't support scaling yet, which involves
+ * allocating the LBM memory for scaling temporary
+ * storage, and putting filter kernels in the HVS
+ * context.
+ */
+ return -EINVAL;
+ }
+
if (crtc_x < 0) {
offset += drm_format_plane_cpp(fb->pixel_format, 0) * -crtc_x;
crtc_w += crtc_x;
diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c
index f545913a56c7..578fe0a9324c 100644
--- a/drivers/gpu/drm/virtio/virtgpu_display.c
+++ b/drivers/gpu/drm/virtio/virtgpu_display.c
@@ -412,7 +412,7 @@ static const struct drm_connector_funcs virtio_gpu_connector_funcs = {
.save = virtio_gpu_conn_save,
.restore = virtio_gpu_conn_restore,
.detect = virtio_gpu_conn_detect,
- .fill_modes = drm_helper_probe_single_connector_modes,
+ .fill_modes = drm_helper_probe_single_connector_modes_nomerge,
.destroy = virtio_gpu_conn_destroy,
.reset = drm_atomic_helper_connector_reset,
.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index a09cf8529b9f..c49812b80dd0 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1233,6 +1233,7 @@ static void vmw_master_drop(struct drm_device *dev,
vmw_fp->locked_master = drm_master_get(file_priv->master);
ret = ttm_vt_lock(&vmaster->lock, false, vmw_fp->tfile);
+ vmw_kms_legacy_hotspot_clear(dev_priv);
if (unlikely((ret != 0))) {
DRM_ERROR("Unable to lock TTM at VT switch.\n");
drm_master_put(&vmw_fp->locked_master);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index a8ae9dfb83b7..469cdd520615 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -925,6 +925,7 @@ int vmw_kms_present(struct vmw_private *dev_priv,
uint32_t num_clips);
int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
+void vmw_kms_legacy_hotspot_clear(struct vmw_private *dev_priv);
int vmw_dumb_create(struct drm_file *file_priv,
struct drm_device *dev,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index a8baf5f5e765..b6a0806b06bf 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -390,7 +390,7 @@ void *vmw_fifo_reserve_dx(struct vmw_private *dev_priv, uint32_t bytes,
else if (ctx_id == SVGA3D_INVALID_ID)
ret = vmw_local_fifo_reserve(dev_priv, bytes);
else {
- WARN_ON("Command buffer has not been allocated.\n");
+ WARN(1, "Command buffer has not been allocated.\n");
ret = NULL;
}
if (IS_ERR_OR_NULL(ret)) {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 9fcd7f82995c..9b4bb9e74d73 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -133,13 +133,19 @@ void vmw_cursor_update_position(struct vmw_private *dev_priv,
vmw_mmio_write(++count, fifo_mem + SVGA_FIFO_CURSOR_COUNT);
}
-int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
- uint32_t handle, uint32_t width, uint32_t height)
+
+/*
+ * vmw_du_crtc_cursor_set2 - Driver cursor_set2 callback.
+ */
+int vmw_du_crtc_cursor_set2(struct drm_crtc *crtc, struct drm_file *file_priv,
+ uint32_t handle, uint32_t width, uint32_t height,
+ int32_t hot_x, int32_t hot_y)
{
struct vmw_private *dev_priv = vmw_priv(crtc->dev);
struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
struct vmw_surface *surface = NULL;
struct vmw_dma_buffer *dmabuf = NULL;
+ s32 hotspot_x, hotspot_y;
int ret;
/*
@@ -151,6 +157,8 @@ int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
*/
drm_modeset_unlock_crtc(crtc);
drm_modeset_lock_all(dev_priv->dev);
+ hotspot_x = hot_x + du->hotspot_x;
+ hotspot_y = hot_y + du->hotspot_y;
/* A lot of the code assumes this */
if (handle && (width != 64 || height != 64)) {
@@ -187,31 +195,34 @@ int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
vmw_dmabuf_unreference(&du->cursor_dmabuf);
/* setup new image */
+ ret = 0;
if (surface) {
/* vmw_user_surface_lookup takes one reference */
du->cursor_surface = surface;
du->cursor_surface->snooper.crtc = crtc;
du->cursor_age = du->cursor_surface->snooper.age;
- vmw_cursor_update_image(dev_priv, surface->snooper.image,
- 64, 64, du->hotspot_x, du->hotspot_y);
+ ret = vmw_cursor_update_image(dev_priv, surface->snooper.image,
+ 64, 64, hotspot_x, hotspot_y);
} else if (dmabuf) {
/* vmw_user_surface_lookup takes one reference */
du->cursor_dmabuf = dmabuf;
ret = vmw_cursor_update_dmabuf(dev_priv, dmabuf, width, height,
- du->hotspot_x, du->hotspot_y);
+ hotspot_x, hotspot_y);
} else {
vmw_cursor_update_position(dev_priv, false, 0, 0);
- ret = 0;
goto out;
}
- vmw_cursor_update_position(dev_priv, true,
- du->cursor_x + du->hotspot_x,
- du->cursor_y + du->hotspot_y);
+ if (!ret) {
+ vmw_cursor_update_position(dev_priv, true,
+ du->cursor_x + hotspot_x,
+ du->cursor_y + hotspot_y);
+ du->core_hotspot_x = hot_x;
+ du->core_hotspot_y = hot_y;
+ }
- ret = 0;
out:
drm_modeset_unlock_all(dev_priv->dev);
drm_modeset_lock_crtc(crtc, crtc->cursor);
@@ -239,8 +250,10 @@ int vmw_du_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
drm_modeset_lock_all(dev_priv->dev);
vmw_cursor_update_position(dev_priv, shown,
- du->cursor_x + du->hotspot_x,
- du->cursor_y + du->hotspot_y);
+ du->cursor_x + du->hotspot_x +
+ du->core_hotspot_x,
+ du->cursor_y + du->hotspot_y +
+ du->core_hotspot_y);
drm_modeset_unlock_all(dev_priv->dev);
drm_modeset_lock_crtc(crtc, crtc->cursor);
@@ -334,6 +347,29 @@ err_unreserve:
ttm_bo_unreserve(bo);
}
+/**
+ * vmw_kms_legacy_hotspot_clear - Clear legacy hotspots
+ *
+ * @dev_priv: Pointer to the device private struct.
+ *
+ * Clears all legacy hotspots.
+ */
+void vmw_kms_legacy_hotspot_clear(struct vmw_private *dev_priv)
+{
+ struct drm_device *dev = dev_priv->dev;
+ struct vmw_display_unit *du;
+ struct drm_crtc *crtc;
+
+ drm_modeset_lock_all(dev);
+ drm_for_each_crtc(crtc, dev) {
+ du = vmw_crtc_to_du(crtc);
+
+ du->hotspot_x = 0;
+ du->hotspot_y = 0;
+ }
+ drm_modeset_unlock_all(dev);
+}
+
void vmw_kms_cursor_post_execbuf(struct vmw_private *dev_priv)
{
struct drm_device *dev = dev_priv->dev;
@@ -351,7 +387,9 @@ void vmw_kms_cursor_post_execbuf(struct vmw_private *dev_priv)
du->cursor_age = du->cursor_surface->snooper.age;
vmw_cursor_update_image(dev_priv,
du->cursor_surface->snooper.image,
- 64, 64, du->hotspot_x, du->hotspot_y);
+ 64, 64,
+ du->hotspot_x + du->core_hotspot_x,
+ du->hotspot_y + du->core_hotspot_y);
}
mutex_unlock(&dev->mode_config.mutex);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index 782df7ca9794..edd81503516d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -159,6 +159,8 @@ struct vmw_display_unit {
int hotspot_x;
int hotspot_y;
+ s32 core_hotspot_x;
+ s32 core_hotspot_y;
unsigned unit;
@@ -193,8 +195,9 @@ void vmw_du_crtc_restore(struct drm_crtc *crtc);
void vmw_du_crtc_gamma_set(struct drm_crtc *crtc,
u16 *r, u16 *g, u16 *b,
uint32_t start, uint32_t size);
-int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
- uint32_t handle, uint32_t width, uint32_t height);
+int vmw_du_crtc_cursor_set2(struct drm_crtc *crtc, struct drm_file *file_priv,
+ uint32_t handle, uint32_t width, uint32_t height,
+ int32_t hot_x, int32_t hot_y);
int vmw_du_crtc_cursor_move(struct drm_crtc *crtc, int x, int y);
int vmw_du_connector_dpms(struct drm_connector *connector, int mode);
void vmw_du_connector_save(struct drm_connector *connector);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index bb63e4d795fa..52caecb4502e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -297,7 +297,7 @@ static int vmw_ldu_crtc_set_config(struct drm_mode_set *set)
static struct drm_crtc_funcs vmw_legacy_crtc_funcs = {
.save = vmw_du_crtc_save,
.restore = vmw_du_crtc_restore,
- .cursor_set = vmw_du_crtc_cursor_set,
+ .cursor_set2 = vmw_du_crtc_cursor_set2,
.cursor_move = vmw_du_crtc_cursor_move,
.gamma_set = vmw_du_crtc_gamma_set,
.destroy = vmw_ldu_crtc_destroy,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index b96d1ab610c5..13926ff192e3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -533,7 +533,7 @@ out_no_fence:
static struct drm_crtc_funcs vmw_screen_object_crtc_funcs = {
.save = vmw_du_crtc_save,
.restore = vmw_du_crtc_restore,
- .cursor_set = vmw_du_crtc_cursor_set,
+ .cursor_set2 = vmw_du_crtc_cursor_set2,
.cursor_move = vmw_du_crtc_cursor_move,
.gamma_set = vmw_du_crtc_gamma_set,
.destroy = vmw_sou_crtc_destroy,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index b1fc1c02792d..f823fc3efed7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -1043,7 +1043,7 @@ out_finish:
static struct drm_crtc_funcs vmw_stdu_crtc_funcs = {
.save = vmw_du_crtc_save,
.restore = vmw_du_crtc_restore,
- .cursor_set = vmw_du_crtc_cursor_set,
+ .cursor_set2 = vmw_du_crtc_cursor_set2,
.cursor_move = vmw_du_crtc_cursor_move,
.gamma_set = vmw_du_crtc_gamma_set,
.destroy = vmw_stdu_crtc_destroy,
diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index ba47b30d28fa..f2e13eb8339f 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -28,6 +28,7 @@
#include <linux/irqchip/chained_irq.h>
#include <linux/irqdomain.h>
#include <linux/of_device.h>
+#include <linux/of_graph.h>
#include <drm/drm_fourcc.h>
@@ -993,12 +994,26 @@ static void platform_device_unregister_children(struct platform_device *pdev)
struct ipu_platform_reg {
struct ipu_client_platformdata pdata;
const char *name;
- int reg_offset;
};
+/* These must be in the order of the corresponding device tree port nodes */
static const struct ipu_platform_reg client_reg[] = {
{
.pdata = {
+ .csi = 0,
+ .dma[0] = IPUV3_CHANNEL_CSI0,
+ .dma[1] = -EINVAL,
+ },
+ .name = "imx-ipuv3-camera",
+ }, {
+ .pdata = {
+ .csi = 1,
+ .dma[0] = IPUV3_CHANNEL_CSI1,
+ .dma[1] = -EINVAL,
+ },
+ .name = "imx-ipuv3-camera",
+ }, {
+ .pdata = {
.di = 0,
.dc = 5,
.dp = IPU_DP_FLOW_SYNC_BG,
@@ -1015,22 +1030,6 @@ static const struct ipu_platform_reg client_reg[] = {
.dma[1] = -EINVAL,
},
.name = "imx-ipuv3-crtc",
- }, {
- .pdata = {
- .csi = 0,
- .dma[0] = IPUV3_CHANNEL_CSI0,
- .dma[1] = -EINVAL,
- },
- .reg_offset = IPU_CM_CSI0_REG_OFS,
- .name = "imx-ipuv3-camera",
- }, {
- .pdata = {
- .csi = 1,
- .dma[0] = IPUV3_CHANNEL_CSI1,
- .dma[1] = -EINVAL,
- },
- .reg_offset = IPU_CM_CSI1_REG_OFS,
- .name = "imx-ipuv3-camera",
},
};
@@ -1051,22 +1050,30 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base)
for (i = 0; i < ARRAY_SIZE(client_reg); i++) {
const struct ipu_platform_reg *reg = &client_reg[i];
struct platform_device *pdev;
- struct resource res;
-
- if (reg->reg_offset) {
- memset(&res, 0, sizeof(res));
- res.flags = IORESOURCE_MEM;
- res.start = ipu_base + ipu->devtype->cm_ofs + reg->reg_offset;
- res.end = res.start + PAGE_SIZE - 1;
- pdev = platform_device_register_resndata(dev, reg->name,
- id++, &res, 1, &reg->pdata, sizeof(reg->pdata));
- } else {
- pdev = platform_device_register_data(dev, reg->name,
- id++, &reg->pdata, sizeof(reg->pdata));
+
+ pdev = platform_device_alloc(reg->name, id++);
+ if (!pdev) {
+ ret = -ENOMEM;
+ goto err_register;
+ }
+
+ pdev->dev.parent = dev;
+
+ /* Associate subdevice with the corresponding port node */
+ pdev->dev.of_node = of_graph_get_port_by_id(dev->of_node, i);
+ if (!pdev->dev.of_node) {
+ dev_err(dev, "missing port@%d node in %s\n", i,
+ dev->of_node->full_name);
+ ret = -ENODEV;
+ goto err_register;
}
- if (IS_ERR(pdev)) {
- ret = PTR_ERR(pdev);
+ ret = platform_device_add_data(pdev, &reg->pdata,
+ sizeof(reg->pdata));
+ if (!ret)
+ ret = platform_device_add(pdev);
+ if (ret) {
+ platform_device_put(pdev);
goto err_register;
}
}
diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
index 3166e4bc4eb6..9abcaa53bd25 100644
--- a/drivers/gpu/vga/vgaarb.c
+++ b/drivers/gpu/vga/vgaarb.c
@@ -395,8 +395,10 @@ int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible)
set_current_state(interruptible ?
TASK_INTERRUPTIBLE :
TASK_UNINTERRUPTIBLE);
- if (signal_pending(current)) {
- rc = -EINTR;
+ if (interruptible && signal_pending(current)) {
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&vga_wait_queue, &wait);
+ rc = -ERESTARTSYS;
break;
}
schedule();
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index ac1feea51be3..8b78a7f1f779 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -316,11 +316,6 @@
#define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_A001 0xa001
#define USB_VENDOR_ID_ELAN 0x04f3
-#define USB_DEVICE_ID_ELAN_TOUCHSCREEN 0x0089
-#define USB_DEVICE_ID_ELAN_TOUCHSCREEN_009B 0x009b
-#define USB_DEVICE_ID_ELAN_TOUCHSCREEN_0103 0x0103
-#define USB_DEVICE_ID_ELAN_TOUCHSCREEN_010c 0x010c
-#define USB_DEVICE_ID_ELAN_TOUCHSCREEN_016F 0x016f
#define USB_VENDOR_ID_ELECOM 0x056e
#define USB_DEVICE_ID_ELECOM_BM084 0x0061
@@ -609,6 +604,7 @@
#define USB_DEVICE_ID_LOGITECH_HARMONY_FIRST 0xc110
#define USB_DEVICE_ID_LOGITECH_HARMONY_LAST 0xc14f
#define USB_DEVICE_ID_LOGITECH_HARMONY_PS3 0x0306
+#define USB_DEVICE_ID_LOGITECH_KEYBOARD_G710_PLUS 0xc24d
#define USB_DEVICE_ID_LOGITECH_MOUSE_C01A 0xc01a
#define USB_DEVICE_ID_LOGITECH_MOUSE_C05A 0xc05a
#define USB_DEVICE_ID_LOGITECH_MOUSE_C06A 0xc06a
diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c
index c20ac76c0a8c..c690fae02cf8 100644
--- a/drivers/hid/hid-lg.c
+++ b/drivers/hid/hid-lg.c
@@ -665,8 +665,9 @@ static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id)
struct lg_drv_data *drv_data;
int ret;
- /* Only work with the 1st interface (G29 presents multiple) */
- if (iface_num != 0) {
+ /* G29 only work with the 1st interface */
+ if ((hdev->product == USB_DEVICE_ID_LOGITECH_G29_WHEEL) &&
+ (iface_num != 0)) {
dbg_hid("%s: ignoring ifnum %d\n", __func__, iface_num);
return -ENODEV;
}
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 94bb137abe32..7dd0953cd70f 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -72,11 +72,7 @@ static const struct hid_blacklist {
{ USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL },
{ USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
{ USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT },
- { USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ELAN_TOUCHSCREEN, HID_QUIRK_ALWAYS_POLL },
- { USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ELAN_TOUCHSCREEN_009B, HID_QUIRK_ALWAYS_POLL },
- { USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ELAN_TOUCHSCREEN_0103, HID_QUIRK_ALWAYS_POLL },
- { USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ELAN_TOUCHSCREEN_010c, HID_QUIRK_ALWAYS_POLL },
- { USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ELAN_TOUCHSCREEN_016F, HID_QUIRK_ALWAYS_POLL },
+ { USB_VENDOR_ID_ELAN, HID_ANY_ID, HID_QUIRK_ALWAYS_POLL },
{ USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET },
{ USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER, HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28, HID_QUIRK_NOGET },
@@ -84,6 +80,7 @@ static const struct hid_blacklist {
{ USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A, HID_QUIRK_ALWAYS_POLL },
{ USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL },
{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C077, HID_QUIRK_ALWAYS_POLL },
+ { USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_KEYBOARD_G710_PLUS, HID_QUIRK_NOGET },
{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C01A, HID_QUIRK_ALWAYS_POLL },
{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C05A, HID_QUIRK_ALWAYS_POLL },
{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C06A, HID_QUIRK_ALWAYS_POLL },
@@ -339,7 +336,8 @@ static const struct hid_blacklist *usbhid_exists_squirk(const u16 idVendor,
for (; hid_blacklist[n].idVendor; n++)
if (hid_blacklist[n].idVendor == idVendor &&
- hid_blacklist[n].idProduct == idProduct)
+ (hid_blacklist[n].idProduct == (__u16) HID_ANY_ID ||
+ hid_blacklist[n].idProduct == idProduct))
bl_entry = &hid_blacklist[n];
if (bl_entry != NULL)
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 8b29949507d1..01a4f05c1642 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2481,7 +2481,7 @@ void wacom_setup_device_quirks(struct wacom *wacom)
if (features->pktlen == WACOM_PKGLEN_BBTOUCH3) {
if (features->touch_max)
features->device_type |= WACOM_DEVICETYPE_TOUCH;
- if (features->type >= INTUOSHT || features->type <= BAMBOO_PT)
+ if (features->type >= INTUOSHT && features->type <= BAMBOO_PT)
features->device_type |= WACOM_DEVICETYPE_PAD;
features->x_max = 4096;
@@ -3213,7 +3213,8 @@ static const struct wacom_features wacom_features_0x32F =
WACOM_DTU_OFFSET, WACOM_DTU_OFFSET };
static const struct wacom_features wacom_features_0x336 =
{ "Wacom DTU1141", 23472, 13203, 1023, 0,
- DTUS, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 4 };
+ DTUS, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 4,
+ WACOM_DTU_OFFSET, WACOM_DTU_OFFSET };
static const struct wacom_features wacom_features_0x57 =
{ "Wacom DTK2241", 95640, 54060, 2047, 63,
DTK, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 6,
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 842b0043ad94..80a73bfc1a65 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -324,6 +324,7 @@ config SENSORS_APPLESMC
config SENSORS_ARM_SCPI
tristate "ARM SCPI Sensors"
depends on ARM_SCPI_PROTOCOL
+ depends on THERMAL || !THERMAL_OF
help
This driver provides support for temperature, voltage, current
and power sensors available on ARM Ltd's SCP based platforms. The
@@ -1216,6 +1217,7 @@ config SENSORS_PWM_FAN
config SENSORS_SHT15
tristate "Sensiron humidity and temperature sensors. SHT15 and compat."
depends on GPIOLIB || COMPILE_TEST
+ select BITREVERSE
help
If you say yes here you get support for the Sensiron SHT10, SHT11,
SHT15, SHT71, SHT75 humidity and temperature sensors.
@@ -1471,6 +1473,7 @@ config SENSORS_INA209
config SENSORS_INA2XX
tristate "Texas Instruments INA219 and compatibles"
depends on I2C
+ select REGMAP_I2C
help
If you say yes here you get support for INA219, INA220, INA226,
INA230, and INA231 power monitor chips.
diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index 1f5e956941b1..0af7fd311979 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -537,7 +537,7 @@ static int applesmc_init_index(struct applesmc_registers *s)
static int applesmc_init_smcreg_try(void)
{
struct applesmc_registers *s = &smcreg;
- bool left_light_sensor, right_light_sensor;
+ bool left_light_sensor = 0, right_light_sensor = 0;
unsigned int count;
u8 tmp[1];
int ret;
diff --git a/drivers/hwmon/scpi-hwmon.c b/drivers/hwmon/scpi-hwmon.c
index 2c1241bbf9af..7e20567bc369 100644
--- a/drivers/hwmon/scpi-hwmon.c
+++ b/drivers/hwmon/scpi-hwmon.c
@@ -117,7 +117,7 @@ static int scpi_hwmon_probe(struct platform_device *pdev)
struct scpi_ops *scpi_ops;
struct device *hwdev, *dev = &pdev->dev;
struct scpi_sensors *scpi_sensors;
- int ret;
+ int ret, idx;
scpi_ops = get_scpi_ops();
if (!scpi_ops)
@@ -146,8 +146,8 @@ static int scpi_hwmon_probe(struct platform_device *pdev)
scpi_sensors->scpi_ops = scpi_ops;
- for (i = 0; i < nr_sensors; i++) {
- struct sensor_data *sensor = &scpi_sensors->data[i];
+ for (i = 0, idx = 0; i < nr_sensors; i++) {
+ struct sensor_data *sensor = &scpi_sensors->data[idx];
ret = scpi_ops->sensor_get_info(i, &sensor->info);
if (ret)
@@ -183,7 +183,7 @@ static int scpi_hwmon_probe(struct platform_device *pdev)
num_power++;
break;
default:
- break;
+ continue;
}
sensor->dev_attr_input.attr.mode = S_IRUGO;
@@ -194,11 +194,12 @@ static int scpi_hwmon_probe(struct platform_device *pdev)
sensor->dev_attr_label.show = scpi_show_label;
sensor->dev_attr_label.attr.name = sensor->label;
- scpi_sensors->attrs[i << 1] = &sensor->dev_attr_input.attr;
- scpi_sensors->attrs[(i << 1) + 1] = &sensor->dev_attr_label.attr;
+ scpi_sensors->attrs[idx << 1] = &sensor->dev_attr_input.attr;
+ scpi_sensors->attrs[(idx << 1) + 1] = &sensor->dev_attr_label.attr;
- sysfs_attr_init(scpi_sensors->attrs[i << 1]);
- sysfs_attr_init(scpi_sensors->attrs[(i << 1) + 1]);
+ sysfs_attr_init(scpi_sensors->attrs[idx << 1]);
+ sysfs_attr_init(scpi_sensors->attrs[(idx << 1) + 1]);
+ idx++;
}
scpi_sensors->group.attrs = scpi_sensors->attrs;
@@ -236,8 +237,8 @@ static int scpi_hwmon_probe(struct platform_device *pdev)
zone->sensor_id = i;
zone->scpi_sensors = scpi_sensors;
- zone->tzd = thermal_zone_of_sensor_register(dev, i, zone,
- &scpi_sensor_ops);
+ zone->tzd = thermal_zone_of_sensor_register(dev,
+ sensor->info.sensor_id, zone, &scpi_sensor_ops);
/*
* The call to thermal_zone_of_sensor_register returns
* an error for sensors that are not associated with
diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c
index 65482624ea2c..5289aa0980a8 100644
--- a/drivers/hwmon/tmp102.c
+++ b/drivers/hwmon/tmp102.c
@@ -58,6 +58,7 @@ struct tmp102 {
u16 config_orig;
unsigned long last_update;
int temp[3];
+ bool first_time;
};
/* convert left adjusted 13-bit TMP102 register value to milliCelsius */
@@ -93,6 +94,7 @@ static struct tmp102 *tmp102_update_device(struct device *dev)
tmp102->temp[i] = tmp102_reg_to_mC(status);
}
tmp102->last_update = jiffies;
+ tmp102->first_time = false;
}
mutex_unlock(&tmp102->lock);
return tmp102;
@@ -102,6 +104,12 @@ static int tmp102_read_temp(void *dev, int *temp)
{
struct tmp102 *tmp102 = tmp102_update_device(dev);
+ /* Is it too early even to return a conversion? */
+ if (tmp102->first_time) {
+ dev_dbg(dev, "%s: Conversion not ready yet..\n", __func__);
+ return -EAGAIN;
+ }
+
*temp = tmp102->temp[0];
return 0;
@@ -114,6 +122,10 @@ static ssize_t tmp102_show_temp(struct device *dev,
struct sensor_device_attribute *sda = to_sensor_dev_attr(attr);
struct tmp102 *tmp102 = tmp102_update_device(dev);
+ /* Is it too early even to return a read? */
+ if (tmp102->first_time)
+ return -EAGAIN;
+
return sprintf(buf, "%d\n", tmp102->temp[sda->index]);
}
@@ -207,7 +219,9 @@ static int tmp102_probe(struct i2c_client *client,
status = -ENODEV;
goto fail_restore_config;
}
- tmp102->last_update = jiffies - HZ;
+ tmp102->last_update = jiffies;
+ /* Mark that we are not ready with data until conversion is complete */
+ tmp102->first_time = true;
mutex_init(&tmp102->lock);
hwmon_dev = hwmon_device_register_with_groups(dev, client->name,
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index e24c2b680b47..7b0aa82ea38b 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -126,6 +126,7 @@ config I2C_I801
Sunrise Point-LP (PCH)
DNV (SOC)
Broxton (SOC)
+ Lewisburg (PCH)
This driver can also be built as a module. If so, the module
will be called i2c-i801.
diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c
index c5628a42170a..a8bdcb5292f5 100644
--- a/drivers/i2c/busses/i2c-davinci.c
+++ b/drivers/i2c/busses/i2c-davinci.c
@@ -202,8 +202,15 @@ static void i2c_davinci_calc_clk_dividers(struct davinci_i2c_dev *dev)
* d is always 6 on Keystone I2C controller
*/
- /* get minimum of 7 MHz clock, but max of 12 MHz */
- psc = (input_clock / 7000000) - 1;
+ /*
+ * Both Davinci and current Keystone User Guides recommend a value
+ * between 7MHz and 12MHz. In reality 7MHz module clock doesn't
+ * always produce enough margin between SDA and SCL transitions.
+ * Measurements show that the higher the module clock is, the
+ * bigger is the margin, providing more reliable communication.
+ * So we better target for 12MHz.
+ */
+ psc = (input_clock / 12000000) - 1;
if ((input_clock / (psc + 1)) > 12000000)
psc++; /* better to run under spec than over */
d = (psc >= 2) ? 5 : 7 - psc;
diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c
index 8c48b27ba059..de7fbbb374cd 100644
--- a/drivers/i2c/busses/i2c-designware-core.c
+++ b/drivers/i2c/busses/i2c-designware-core.c
@@ -813,6 +813,12 @@ static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
tx_aborted:
if ((stat & (DW_IC_INTR_TX_ABRT | DW_IC_INTR_STOP_DET)) || dev->msg_err)
complete(&dev->cmd_complete);
+ else if (unlikely(dev->accessor_flags & ACCESS_INTR_MASK)) {
+ /* workaround to trigger pending interrupt */
+ stat = dw_readl(dev, DW_IC_INTR_MASK);
+ i2c_dw_disable_int(dev);
+ dw_writel(dev, stat, DW_IC_INTR_MASK);
+ }
return IRQ_HANDLED;
}
diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index 1d50898e7b24..9ffb63a60f95 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -111,6 +111,7 @@ struct dw_i2c_dev {
#define ACCESS_SWAP 0x00000001
#define ACCESS_16BIT 0x00000002
+#define ACCESS_INTR_MASK 0x00000004
extern int i2c_dw_init(struct dw_i2c_dev *dev);
extern void i2c_dw_disable(struct dw_i2c_dev *dev);
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 809579ecb5a4..6b00061c3746 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -93,6 +93,7 @@ static void dw_i2c_acpi_params(struct platform_device *pdev, char method[],
static int dw_i2c_acpi_configure(struct platform_device *pdev)
{
struct dw_i2c_dev *dev = platform_get_drvdata(pdev);
+ const struct acpi_device_id *id;
dev->adapter.nr = -1;
dev->tx_fifo_depth = 32;
@@ -106,6 +107,10 @@ static int dw_i2c_acpi_configure(struct platform_device *pdev)
dw_i2c_acpi_params(pdev, "FMCN", &dev->fs_hcnt, &dev->fs_lcnt,
&dev->sda_hold_time);
+ id = acpi_match_device(pdev->dev.driver->acpi_match_table, &pdev->dev);
+ if (id && id->driver_data)
+ dev->accessor_flags |= (u32)id->driver_data;
+
return 0;
}
@@ -116,7 +121,7 @@ static const struct acpi_device_id dw_i2c_acpi_match[] = {
{ "INT3433", 0 },
{ "80860F41", 0 },
{ "808622C1", 0 },
- { "AMD0010", 0 },
+ { "AMD0010", ACCESS_INTR_MASK },
{ }
};
MODULE_DEVICE_TABLE(acpi, dw_i2c_acpi_match);
@@ -240,12 +245,10 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
}
r = i2c_dw_probe(dev);
- if (r) {
+ if (r && !dev->pm_runtime_disabled)
pm_runtime_disable(&pdev->dev);
- return r;
- }
- return 0;
+ return r;
}
static int dw_i2c_plat_remove(struct platform_device *pdev)
@@ -260,7 +263,8 @@ static int dw_i2c_plat_remove(struct platform_device *pdev)
pm_runtime_dont_use_autosuspend(&pdev->dev);
pm_runtime_put_sync(&pdev->dev);
- pm_runtime_disable(&pdev->dev);
+ if (!dev->pm_runtime_disabled)
+ pm_runtime_disable(&pdev->dev);
return 0;
}
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index c306751ceadb..f62d69799a9c 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -62,6 +62,8 @@
* Sunrise Point-LP (PCH) 0x9d23 32 hard yes yes yes
* DNV (SOC) 0x19df 32 hard yes yes yes
* Broxton (SOC) 0x5ad4 32 hard yes yes yes
+ * Lewisburg (PCH) 0xa1a3 32 hard yes yes yes
+ * Lewisburg Supersku (PCH) 0xa223 32 hard yes yes yes
*
* Features supported by this driver:
* Software PEC no
@@ -206,6 +208,8 @@
#define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS 0x9d23
#define PCI_DEVICE_ID_INTEL_DNV_SMBUS 0x19df
#define PCI_DEVICE_ID_INTEL_BROXTON_SMBUS 0x5ad4
+#define PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS 0xa1a3
+#define PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS 0xa223
struct i801_mux_config {
char *gpio_chip;
@@ -869,6 +873,8 @@ static const struct pci_device_id i801_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_DNV_SMBUS) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROXTON_SMBUS) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS) },
{ 0, }
};
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 1e4d99da4164..d4d853680ae4 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -50,6 +50,7 @@
#include <linux/of_device.h>
#include <linux/of_dma.h>
#include <linux/of_gpio.h>
+#include <linux/pinctrl/consumer.h>
#include <linux/platform_data/i2c-imx.h>
#include <linux/platform_device.h>
#include <linux/sched.h>
@@ -1118,6 +1119,8 @@ static int i2c_imx_probe(struct platform_device *pdev)
i2c_imx, IMX_I2C_I2CR);
imx_i2c_write_reg(i2c_imx->hwdata->i2sr_clr_opcode, i2c_imx, IMX_I2C_I2SR);
+ i2c_imx_init_recovery_info(i2c_imx, pdev);
+
/* Add I2C adapter */
ret = i2c_add_numbered_adapter(&i2c_imx->adapter);
if (ret < 0) {
@@ -1125,8 +1128,6 @@ static int i2c_imx_probe(struct platform_device *pdev)
goto clk_disable;
}
- i2c_imx_init_recovery_info(i2c_imx, pdev);
-
/* Set up platform driver data */
platform_set_drvdata(pdev, i2c_imx);
clk_disable_unprepare(i2c_imx->clk);
diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c
index 5801227b97ab..43207f52e5a3 100644
--- a/drivers/i2c/busses/i2c-mv64xxx.c
+++ b/drivers/i2c/busses/i2c-mv64xxx.c
@@ -146,6 +146,8 @@ struct mv64xxx_i2c_data {
bool errata_delay;
struct reset_control *rstc;
bool irq_clear_inverted;
+ /* Clk div is 2 to the power n, not 2 to the power n + 1 */
+ bool clk_n_base_0;
};
static struct mv64xxx_i2c_regs mv64xxx_i2c_regs_mv64xxx = {
@@ -757,25 +759,29 @@ MODULE_DEVICE_TABLE(of, mv64xxx_i2c_of_match_table);
#ifdef CONFIG_OF
#ifdef CONFIG_HAVE_CLK
static int
-mv64xxx_calc_freq(const int tclk, const int n, const int m)
+mv64xxx_calc_freq(struct mv64xxx_i2c_data *drv_data,
+ const int tclk, const int n, const int m)
{
- return tclk / (10 * (m + 1) * (2 << n));
+ if (drv_data->clk_n_base_0)
+ return tclk / (10 * (m + 1) * (1 << n));
+ else
+ return tclk / (10 * (m + 1) * (2 << n));
}
static bool
-mv64xxx_find_baud_factors(const u32 req_freq, const u32 tclk, u32 *best_n,
- u32 *best_m)
+mv64xxx_find_baud_factors(struct mv64xxx_i2c_data *drv_data,
+ const u32 req_freq, const u32 tclk)
{
int freq, delta, best_delta = INT_MAX;
int m, n;
for (n = 0; n <= 7; n++)
for (m = 0; m <= 15; m++) {
- freq = mv64xxx_calc_freq(tclk, n, m);
+ freq = mv64xxx_calc_freq(drv_data, tclk, n, m);
delta = req_freq - freq;
if (delta >= 0 && delta < best_delta) {
- *best_m = m;
- *best_n = n;
+ drv_data->freq_m = m;
+ drv_data->freq_n = n;
best_delta = delta;
}
if (best_delta == 0)
@@ -813,8 +819,11 @@ mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data,
if (of_property_read_u32(np, "clock-frequency", &bus_freq))
bus_freq = 100000; /* 100kHz by default */
- if (!mv64xxx_find_baud_factors(bus_freq, tclk,
- &drv_data->freq_n, &drv_data->freq_m)) {
+ if (of_device_is_compatible(np, "allwinner,sun4i-a10-i2c") ||
+ of_device_is_compatible(np, "allwinner,sun6i-a31-i2c"))
+ drv_data->clk_n_base_0 = true;
+
+ if (!mv64xxx_find_baud_factors(drv_data, bus_freq, tclk)) {
rc = -EINVAL;
goto out;
}
diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index b0ae560b38c3..599c0d7bd906 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c
@@ -576,7 +576,7 @@ static int rcar_reg_slave(struct i2c_client *slave)
if (slave->flags & I2C_CLIENT_TEN)
return -EAFNOSUPPORT;
- pm_runtime_forbid(rcar_i2c_priv_to_dev(priv));
+ pm_runtime_get_sync(rcar_i2c_priv_to_dev(priv));
priv->slave = slave;
rcar_i2c_write(priv, ICSAR, slave->addr);
@@ -598,7 +598,7 @@ static int rcar_unreg_slave(struct i2c_client *slave)
priv->slave = NULL;
- pm_runtime_allow(rcar_i2c_priv_to_dev(priv));
+ pm_runtime_put(rcar_i2c_priv_to_dev(priv));
return 0;
}
diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c
index c1935ebd6a9c..9096d17beb5b 100644
--- a/drivers/i2c/busses/i2c-rk3x.c
+++ b/drivers/i2c/busses/i2c-rk3x.c
@@ -908,7 +908,7 @@ static int rk3x_i2c_probe(struct platform_device *pdev)
&i2c->scl_fall_ns))
i2c->scl_fall_ns = 300;
if (of_property_read_u32(pdev->dev.of_node, "i2c-sda-falling-time-ns",
- &i2c->scl_fall_ns))
+ &i2c->sda_fall_ns))
i2c->sda_fall_ns = i2c->scl_fall_ns;
strlcpy(i2c->adap.name, "rk3x-i2c", sizeof(i2c->adap.name));
diff --git a/drivers/i2c/busses/i2c-st.c b/drivers/i2c/busses/i2c-st.c
index ea72dca32fdf..25020ec777c9 100644
--- a/drivers/i2c/busses/i2c-st.c
+++ b/drivers/i2c/busses/i2c-st.c
@@ -822,7 +822,7 @@ static int st_i2c_probe(struct platform_device *pdev)
adap = &i2c_dev->adap;
i2c_set_adapdata(adap, i2c_dev);
- snprintf(adap->name, sizeof(adap->name), "ST I2C(0x%pa)", &res->start);
+ snprintf(adap->name, sizeof(adap->name), "ST I2C(%pa)", &res->start);
adap->owner = THIS_MODULE;
adap->timeout = 2 * HZ;
adap->retries = 0;
diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index e23a7b068c60..0b20449e48cf 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -662,8 +662,10 @@ static void __xiic_start_xfer(struct xiic_i2c *i2c)
static void xiic_start_xfer(struct xiic_i2c *i2c)
{
-
+ spin_lock(&i2c->lock);
+ xiic_reinit(i2c);
__xiic_start_xfer(i2c);
+ spin_unlock(&i2c->lock);
}
static int xiic_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 040af5cc8143..ba8eb087f224 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -715,7 +715,7 @@ static int i2c_device_probe(struct device *dev)
if (wakeirq > 0 && wakeirq != client->irq)
status = dev_pm_set_dedicated_wake_irq(dev, wakeirq);
else if (client->irq > 0)
- status = dev_pm_set_wake_irq(dev, wakeirq);
+ status = dev_pm_set_wake_irq(dev, client->irq);
else
status = 0;
diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c
index eea0c79111e7..4d960d3b93c0 100644
--- a/drivers/iio/adc/ad7793.c
+++ b/drivers/iio/adc/ad7793.c
@@ -101,7 +101,7 @@
#define AD7795_CH_AIN1M_AIN1M 8 /* AIN1(-) - AIN1(-) */
/* ID Register Bit Designations (AD7793_REG_ID) */
-#define AD7785_ID 0xB
+#define AD7785_ID 0x3
#define AD7792_ID 0xA
#define AD7793_ID 0xB
#define AD7794_ID 0xF
diff --git a/drivers/iio/adc/qcom-spmi-vadc.c b/drivers/iio/adc/qcom-spmi-vadc.c
index 0c4618b4d515..c2babe50a0d8 100644
--- a/drivers/iio/adc/qcom-spmi-vadc.c
+++ b/drivers/iio/adc/qcom-spmi-vadc.c
@@ -839,8 +839,10 @@ static int vadc_get_dt_data(struct vadc_priv *vadc, struct device_node *node)
for_each_available_child_of_node(node, child) {
ret = vadc_get_dt_channel_data(vadc->dev, &prop, child);
- if (ret)
+ if (ret) {
+ of_node_put(child);
return ret;
+ }
vadc->chan_props[index] = prop;
diff --git a/drivers/iio/adc/vf610_adc.c b/drivers/iio/adc/vf610_adc.c
index 599cde3d03a1..b10f629cc44b 100644
--- a/drivers/iio/adc/vf610_adc.c
+++ b/drivers/iio/adc/vf610_adc.c
@@ -106,6 +106,13 @@
#define DEFAULT_SAMPLE_TIME 1000
+/* V at 25°C of 696 mV */
+#define VF610_VTEMP25_3V0 950
+/* V at 25°C of 699 mV */
+#define VF610_VTEMP25_3V3 867
+/* Typical sensor slope coefficient at all temperatures */
+#define VF610_TEMP_SLOPE_COEFF 1840
+
enum clk_sel {
VF610_ADCIOC_BUSCLK_SET,
VF610_ADCIOC_ALTCLK_SET,
@@ -197,6 +204,8 @@ static inline void vf610_adc_calculate_rates(struct vf610_adc *info)
adc_feature->clk_div = 8;
}
+ adck_rate = ipg_rate / adc_feature->clk_div;
+
/*
* Determine the long sample time adder value to be used based
* on the default minimum sample time provided.
@@ -221,7 +230,6 @@ static inline void vf610_adc_calculate_rates(struct vf610_adc *info)
* BCT (Base Conversion Time): fixed to 25 ADCK cycles for 12 bit mode
* LSTAdder(Long Sample Time): 3, 5, 7, 9, 13, 17, 21, 25 ADCK cycles
*/
- adck_rate = ipg_rate / info->adc_feature.clk_div;
for (i = 0; i < ARRAY_SIZE(vf610_hw_avgs); i++)
info->sample_freq_avail[i] =
adck_rate / (6 + vf610_hw_avgs[i] *
@@ -663,11 +671,13 @@ static int vf610_read_raw(struct iio_dev *indio_dev,
break;
case IIO_TEMP:
/*
- * Calculate in degree Celsius times 1000
- * Using sensor slope of 1.84 mV/°C and
- * V at 25°C of 696 mV
- */
- *val = 25000 - ((int)info->value - 864) * 1000000 / 1840;
+ * Calculate in degree Celsius times 1000
+ * Using the typical sensor slope of 1.84 mV/°C
+ * and VREFH_ADC at 3.3V, V at 25°C of 699 mV
+ */
+ *val = 25000 - ((int)info->value - VF610_VTEMP25_3V3) *
+ 1000000 / VF610_TEMP_SLOPE_COEFF;
+
break;
default:
mutex_unlock(&indio_dev->mlock);
diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c
index 0370624a35db..02e636a1c49a 100644
--- a/drivers/iio/adc/xilinx-xadc-core.c
+++ b/drivers/iio/adc/xilinx-xadc-core.c
@@ -841,6 +841,7 @@ static int xadc_read_raw(struct iio_dev *indio_dev,
case XADC_REG_VCCINT:
case XADC_REG_VCCAUX:
case XADC_REG_VREFP:
+ case XADC_REG_VREFN:
case XADC_REG_VCCBRAM:
case XADC_REG_VCCPINT:
case XADC_REG_VCCPAUX:
diff --git a/drivers/iio/dac/ad5064.c b/drivers/iio/dac/ad5064.c
index 9e4d2c18b554..81ca0081a019 100644
--- a/drivers/iio/dac/ad5064.c
+++ b/drivers/iio/dac/ad5064.c
@@ -113,12 +113,16 @@ enum ad5064_type {
ID_AD5065,
ID_AD5628_1,
ID_AD5628_2,
+ ID_AD5629_1,
+ ID_AD5629_2,
ID_AD5648_1,
ID_AD5648_2,
ID_AD5666_1,
ID_AD5666_2,
ID_AD5668_1,
ID_AD5668_2,
+ ID_AD5669_1,
+ ID_AD5669_2,
};
static int ad5064_write(struct ad5064_state *st, unsigned int cmd,
@@ -291,7 +295,7 @@ static const struct iio_chan_spec_ext_info ad5064_ext_info[] = {
{ },
};
-#define AD5064_CHANNEL(chan, addr, bits) { \
+#define AD5064_CHANNEL(chan, addr, bits, _shift) { \
.type = IIO_VOLTAGE, \
.indexed = 1, \
.output = 1, \
@@ -303,36 +307,39 @@ static const struct iio_chan_spec_ext_info ad5064_ext_info[] = {
.sign = 'u', \
.realbits = (bits), \
.storagebits = 16, \
- .shift = 20 - bits, \
+ .shift = (_shift), \
}, \
.ext_info = ad5064_ext_info, \
}
-#define DECLARE_AD5064_CHANNELS(name, bits) \
+#define DECLARE_AD5064_CHANNELS(name, bits, shift) \
const struct iio_chan_spec name[] = { \
- AD5064_CHANNEL(0, 0, bits), \
- AD5064_CHANNEL(1, 1, bits), \
- AD5064_CHANNEL(2, 2, bits), \
- AD5064_CHANNEL(3, 3, bits), \
- AD5064_CHANNEL(4, 4, bits), \
- AD5064_CHANNEL(5, 5, bits), \
- AD5064_CHANNEL(6, 6, bits), \
- AD5064_CHANNEL(7, 7, bits), \
+ AD5064_CHANNEL(0, 0, bits, shift), \
+ AD5064_CHANNEL(1, 1, bits, shift), \
+ AD5064_CHANNEL(2, 2, bits, shift), \
+ AD5064_CHANNEL(3, 3, bits, shift), \
+ AD5064_CHANNEL(4, 4, bits, shift), \
+ AD5064_CHANNEL(5, 5, bits, shift), \
+ AD5064_CHANNEL(6, 6, bits, shift), \
+ AD5064_CHANNEL(7, 7, bits, shift), \
}
-#define DECLARE_AD5065_CHANNELS(name, bits) \
+#define DECLARE_AD5065_CHANNELS(name, bits, shift) \
const struct iio_chan_spec name[] = { \
- AD5064_CHANNEL(0, 0, bits), \
- AD5064_CHANNEL(1, 3, bits), \
+ AD5064_CHANNEL(0, 0, bits, shift), \
+ AD5064_CHANNEL(1, 3, bits, shift), \
}
-static DECLARE_AD5064_CHANNELS(ad5024_channels, 12);
-static DECLARE_AD5064_CHANNELS(ad5044_channels, 14);
-static DECLARE_AD5064_CHANNELS(ad5064_channels, 16);
+static DECLARE_AD5064_CHANNELS(ad5024_channels, 12, 8);
+static DECLARE_AD5064_CHANNELS(ad5044_channels, 14, 6);
+static DECLARE_AD5064_CHANNELS(ad5064_channels, 16, 4);
-static DECLARE_AD5065_CHANNELS(ad5025_channels, 12);
-static DECLARE_AD5065_CHANNELS(ad5045_channels, 14);
-static DECLARE_AD5065_CHANNELS(ad5065_channels, 16);
+static DECLARE_AD5065_CHANNELS(ad5025_channels, 12, 8);
+static DECLARE_AD5065_CHANNELS(ad5045_channels, 14, 6);
+static DECLARE_AD5065_CHANNELS(ad5065_channels, 16, 4);
+
+static DECLARE_AD5064_CHANNELS(ad5629_channels, 12, 4);
+static DECLARE_AD5064_CHANNELS(ad5669_channels, 16, 0);
static const struct ad5064_chip_info ad5064_chip_info_tbl[] = {
[ID_AD5024] = {
@@ -382,6 +389,18 @@ static const struct ad5064_chip_info ad5064_chip_info_tbl[] = {
.channels = ad5024_channels,
.num_channels = 8,
},
+ [ID_AD5629_1] = {
+ .shared_vref = true,
+ .internal_vref = 2500000,
+ .channels = ad5629_channels,
+ .num_channels = 8,
+ },
+ [ID_AD5629_2] = {
+ .shared_vref = true,
+ .internal_vref = 5000000,
+ .channels = ad5629_channels,
+ .num_channels = 8,
+ },
[ID_AD5648_1] = {
.shared_vref = true,
.internal_vref = 2500000,
@@ -418,6 +437,18 @@ static const struct ad5064_chip_info ad5064_chip_info_tbl[] = {
.channels = ad5064_channels,
.num_channels = 8,
},
+ [ID_AD5669_1] = {
+ .shared_vref = true,
+ .internal_vref = 2500000,
+ .channels = ad5669_channels,
+ .num_channels = 8,
+ },
+ [ID_AD5669_2] = {
+ .shared_vref = true,
+ .internal_vref = 5000000,
+ .channels = ad5669_channels,
+ .num_channels = 8,
+ },
};
static inline unsigned int ad5064_num_vref(struct ad5064_state *st)
@@ -597,10 +628,16 @@ static int ad5064_i2c_write(struct ad5064_state *st, unsigned int cmd,
unsigned int addr, unsigned int val)
{
struct i2c_client *i2c = to_i2c_client(st->dev);
+ int ret;
st->data.i2c[0] = (cmd << 4) | addr;
put_unaligned_be16(val, &st->data.i2c[1]);
- return i2c_master_send(i2c, st->data.i2c, 3);
+
+ ret = i2c_master_send(i2c, st->data.i2c, 3);
+ if (ret < 0)
+ return ret;
+
+ return 0;
}
static int ad5064_i2c_probe(struct i2c_client *i2c,
@@ -616,12 +653,12 @@ static int ad5064_i2c_remove(struct i2c_client *i2c)
}
static const struct i2c_device_id ad5064_i2c_ids[] = {
- {"ad5629-1", ID_AD5628_1},
- {"ad5629-2", ID_AD5628_2},
- {"ad5629-3", ID_AD5628_2}, /* similar enough to ad5629-2 */
- {"ad5669-1", ID_AD5668_1},
- {"ad5669-2", ID_AD5668_2},
- {"ad5669-3", ID_AD5668_2}, /* similar enough to ad5669-2 */
+ {"ad5629-1", ID_AD5629_1},
+ {"ad5629-2", ID_AD5629_2},
+ {"ad5629-3", ID_AD5629_2}, /* similar enough to ad5629-2 */
+ {"ad5669-1", ID_AD5669_1},
+ {"ad5669-2", ID_AD5669_2},
+ {"ad5669-3", ID_AD5669_2}, /* similar enough to ad5669-2 */
{}
};
MODULE_DEVICE_TABLE(i2c, ad5064_i2c_ids);
diff --git a/drivers/iio/humidity/si7020.c b/drivers/iio/humidity/si7020.c
index 12128d1ca570..71991b5c0658 100644
--- a/drivers/iio/humidity/si7020.c
+++ b/drivers/iio/humidity/si7020.c
@@ -50,10 +50,10 @@ static int si7020_read_raw(struct iio_dev *indio_dev,
switch (mask) {
case IIO_CHAN_INFO_RAW:
- ret = i2c_smbus_read_word_data(*client,
- chan->type == IIO_TEMP ?
- SI7020CMD_TEMP_HOLD :
- SI7020CMD_RH_HOLD);
+ ret = i2c_smbus_read_word_swapped(*client,
+ chan->type == IIO_TEMP ?
+ SI7020CMD_TEMP_HOLD :
+ SI7020CMD_RH_HOLD);
if (ret < 0)
return ret;
*val = ret >> 2;
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index d7e908acb480..0f6f63b20263 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -302,7 +302,7 @@ static int iio_scan_mask_set(struct iio_dev *indio_dev,
if (trialmask == NULL)
return -ENOMEM;
if (!indio_dev->masklength) {
- WARN_ON("Trying to set scanmask prior to registering buffer\n");
+ WARN(1, "Trying to set scanmask prior to registering buffer\n");
goto err_invalid_mask;
}
bitmap_copy(trialmask, buffer->scan_mask, indio_dev->masklength);
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 208358f9e7e3..159ede61f793 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -655,7 +655,7 @@ int __iio_device_attr_init(struct device_attribute *dev_attr,
break;
case IIO_SEPARATE:
if (!chan->indexed) {
- WARN_ON("Differential channels must be indexed\n");
+ WARN(1, "Differential channels must be indexed\n");
ret = -EINVAL;
goto error_free_full_postfix;
}
diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c
index 7d269ef9e062..f6a07dc32ae4 100644
--- a/drivers/iio/light/apds9960.c
+++ b/drivers/iio/light/apds9960.c
@@ -453,6 +453,7 @@ static int apds9960_set_power_state(struct apds9960_data *data, bool on)
usleep_range(data->als_adc_int_us,
APDS9960_MAX_INT_TIME_IN_US);
} else {
+ pm_runtime_mark_last_busy(dev);
ret = pm_runtime_put_autosuspend(dev);
}
diff --git a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
index 961f9f990faf..e544fcfd5ced 100644
--- a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
+++ b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
@@ -130,10 +130,10 @@ static int lidar_get_measurement(struct lidar_data *data, u16 *reg)
if (ret < 0)
break;
- /* return 0 since laser is likely pointed out of range */
+ /* return -EINVAL since laser is likely pointed out of range */
if (ret & LIDAR_REG_STATUS_INVALID) {
*reg = 0;
- ret = 0;
+ ret = -EINVAL;
break;
}
@@ -197,7 +197,7 @@ static irqreturn_t lidar_trigger_handler(int irq, void *private)
if (!ret) {
iio_push_to_buffers_with_timestamp(indio_dev, data->buffer,
iio_get_time_ns());
- } else {
+ } else if (ret != -EINVAL) {
dev_err(&data->client->dev, "cannot read LIDAR measurement");
}
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 944cd90417bc..2d762a2ecd81 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1126,10 +1126,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev,
rcu_read_lock();
err = fib_lookup(dev_net(net_dev), &fl4, &res, 0);
- if (err)
- return false;
-
- ret = FIB_RES_DEV(res) == net_dev;
+ ret = err == 0 && FIB_RES_DEV(res) == net_dev;
rcu_read_unlock();
return ret;
@@ -1268,15 +1265,17 @@ static bool cma_protocol_roce(const struct rdma_cm_id *id)
return cma_protocol_roce_dev_port(device, port_num);
}
-static bool cma_match_net_dev(const struct rdma_id_private *id_priv,
- const struct net_device *net_dev)
+static bool cma_match_net_dev(const struct rdma_cm_id *id,
+ const struct net_device *net_dev,
+ u8 port_num)
{
- const struct rdma_addr *addr = &id_priv->id.route.addr;
+ const struct rdma_addr *addr = &id->route.addr;
if (!net_dev)
/* This request is an AF_IB request or a RoCE request */
- return addr->src_addr.ss_family == AF_IB ||
- cma_protocol_roce(&id_priv->id);
+ return (!id->port_num || id->port_num == port_num) &&
+ (addr->src_addr.ss_family == AF_IB ||
+ cma_protocol_roce_dev_port(id->device, port_num));
return !addr->dev_addr.bound_dev_if ||
(net_eq(dev_net(net_dev), addr->dev_addr.net) &&
@@ -1298,13 +1297,13 @@ static struct rdma_id_private *cma_find_listener(
hlist_for_each_entry(id_priv, &bind_list->owners, node) {
if (cma_match_private_data(id_priv, ib_event->private_data)) {
if (id_priv->id.device == cm_id->device &&
- cma_match_net_dev(id_priv, net_dev))
+ cma_match_net_dev(&id_priv->id, net_dev, req->port))
return id_priv;
list_for_each_entry(id_priv_dev,
&id_priv->listen_list,
listen_list) {
if (id_priv_dev->id.device == cm_id->device &&
- cma_match_net_dev(id_priv_dev, net_dev))
+ cma_match_net_dev(&id_priv_dev->id, net_dev, req->port))
return id_priv_dev;
}
}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 8d8af7a41a30..2281de122038 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1811,6 +1811,11 @@ static int validate_mad(const struct ib_mad_hdr *mad_hdr,
if (qp_num == 0)
valid = 1;
} else {
+ /* CM attributes other than ClassPortInfo only use Send method */
+ if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) &&
+ (mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) &&
+ (mad_hdr->method != IB_MGMT_METHOD_SEND))
+ goto out;
/* Filter GSI packets sent to QP0 */
if (qp_num != 0)
valid = 1;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 2aba774f835b..a95a32ba596e 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -512,7 +512,7 @@ static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
return len;
}
-static int ib_nl_send_msg(struct ib_sa_query *query)
+static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
{
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
@@ -526,7 +526,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query)
if (len <= 0)
return -EMSGSIZE;
- skb = nlmsg_new(len, GFP_KERNEL);
+ skb = nlmsg_new(len, gfp_mask);
if (!skb)
return -ENOMEM;
@@ -544,7 +544,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query)
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
- ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL);
+ ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, gfp_mask);
if (!ret)
ret = len;
else
@@ -553,7 +553,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query)
return ret;
}
-static int ib_nl_make_request(struct ib_sa_query *query)
+static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
{
unsigned long flags;
unsigned long delay;
@@ -562,25 +562,27 @@ static int ib_nl_make_request(struct ib_sa_query *query)
INIT_LIST_HEAD(&query->list);
query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
+ /* Put the request on the list first.*/
spin_lock_irqsave(&ib_nl_request_lock, flags);
- ret = ib_nl_send_msg(query);
- if (ret <= 0) {
- ret = -EIO;
- goto request_out;
- } else {
- ret = 0;
- }
-
delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
query->timeout = delay + jiffies;
list_add_tail(&query->list, &ib_nl_request_list);
/* Start the timeout if this is the only request */
if (ib_nl_request_list.next == &query->list)
queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
-
-request_out:
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
+ ret = ib_nl_send_msg(query, gfp_mask);
+ if (ret <= 0) {
+ ret = -EIO;
+ /* Remove the request */
+ spin_lock_irqsave(&ib_nl_request_lock, flags);
+ list_del(&query->list);
+ spin_unlock_irqrestore(&ib_nl_request_lock, flags);
+ } else {
+ ret = 0;
+ }
+
return ret;
}
@@ -1108,7 +1110,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) {
if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) {
- if (!ib_nl_make_request(query))
+ if (!ib_nl_make_request(query, gfp_mask))
return id;
}
ib_sa_disable_local_svc(query);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 94816aeb95a0..1c02deab068f 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -62,9 +62,11 @@ static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
* The ib_uobject locking scheme is as follows:
*
* - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
- * needs to be held during all idr operations. When an object is
+ * needs to be held during all idr write operations. When an object is
* looked up, a reference must be taken on the object's kref before
- * dropping this lock.
+ * dropping this lock. For read operations, the rcu_read_lock()
+ * and rcu_write_lock() but similarly the kref reference is grabbed
+ * before the rcu_read_unlock().
*
* - Each object also has an rwsem. This rwsem must be held for
* reading while an operation that uses the object is performed.
@@ -96,7 +98,7 @@ static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
static void release_uobj(struct kref *kref)
{
- kfree(container_of(kref, struct ib_uobject, ref));
+ kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu);
}
static void put_uobj(struct ib_uobject *uobj)
@@ -145,7 +147,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
{
struct ib_uobject *uobj;
- spin_lock(&ib_uverbs_idr_lock);
+ rcu_read_lock();
uobj = idr_find(idr, id);
if (uobj) {
if (uobj->context == context)
@@ -153,7 +155,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
else
uobj = NULL;
}
- spin_unlock(&ib_uverbs_idr_lock);
+ rcu_read_unlock();
return uobj;
}
@@ -2446,6 +2448,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
int i, sg_ind;
int is_ud;
ssize_t ret = -EINVAL;
+ size_t next_size;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
@@ -2490,7 +2493,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
goto out_put;
}
- ud = alloc_wr(sizeof(*ud), user_wr->num_sge);
+ next_size = sizeof(*ud);
+ ud = alloc_wr(next_size, user_wr->num_sge);
if (!ud) {
ret = -ENOMEM;
goto out_put;
@@ -2511,7 +2515,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
user_wr->opcode == IB_WR_RDMA_READ) {
struct ib_rdma_wr *rdma;
- rdma = alloc_wr(sizeof(*rdma), user_wr->num_sge);
+ next_size = sizeof(*rdma);
+ rdma = alloc_wr(next_size, user_wr->num_sge);
if (!rdma) {
ret = -ENOMEM;
goto out_put;
@@ -2525,7 +2530,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
struct ib_atomic_wr *atomic;
- atomic = alloc_wr(sizeof(*atomic), user_wr->num_sge);
+ next_size = sizeof(*atomic);
+ atomic = alloc_wr(next_size, user_wr->num_sge);
if (!atomic) {
ret = -ENOMEM;
goto out_put;
@@ -2540,7 +2546,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
} else if (user_wr->opcode == IB_WR_SEND ||
user_wr->opcode == IB_WR_SEND_WITH_IMM ||
user_wr->opcode == IB_WR_SEND_WITH_INV) {
- next = alloc_wr(sizeof(*next), user_wr->num_sge);
+ next_size = sizeof(*next);
+ next = alloc_wr(next_size, user_wr->num_sge);
if (!next) {
ret = -ENOMEM;
goto out_put;
@@ -2572,7 +2579,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
if (next->num_sge) {
next->sg_list = (void *) next +
- ALIGN(sizeof *next, sizeof (struct ib_sge));
+ ALIGN(next_size, sizeof(struct ib_sge));
if (copy_from_user(next->sg_list,
buf + sizeof cmd +
cmd.wr_count * cmd.wqe_size +
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 043a60ee6836..545906dec26d 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1516,7 +1516,7 @@ EXPORT_SYMBOL(ib_map_mr_sg);
* @sg_nents: number of entries in sg
* @set_page: driver page assignment function pointer
*
- * Core service helper for drivers to covert the largest
+ * Core service helper for drivers to convert the largest
* prefix of given sg list to a page vector. The sg list
* prefix converted is the prefix that meet the requirements
* of ib_map_mr_sg.
@@ -1533,7 +1533,7 @@ int ib_sg_to_pages(struct ib_mr *mr,
u64 last_end_dma_addr = 0, last_page_addr = 0;
unsigned int last_page_off = 0;
u64 page_mask = ~((u64)mr->page_size - 1);
- int i;
+ int i, ret;
mr->iova = sg_dma_address(&sgl[0]);
mr->length = 0;
@@ -1544,27 +1544,29 @@ int ib_sg_to_pages(struct ib_mr *mr,
u64 end_dma_addr = dma_addr + dma_len;
u64 page_addr = dma_addr & page_mask;
- if (i && page_addr != dma_addr) {
- if (last_end_dma_addr != dma_addr) {
- /* gap */
- goto done;
-
- } else if (last_page_off + dma_len <= mr->page_size) {
- /* chunk this fragment with the last */
- mr->length += dma_len;
- last_end_dma_addr += dma_len;
- last_page_off += dma_len;
- continue;
- } else {
- /* map starting from the next page */
- page_addr = last_page_addr + mr->page_size;
- dma_len -= mr->page_size - last_page_off;
- }
+ /*
+ * For the second and later elements, check whether either the
+ * end of element i-1 or the start of element i is not aligned
+ * on a page boundary.
+ */
+ if (i && (last_page_off != 0 || page_addr != dma_addr)) {
+ /* Stop mapping if there is a gap. */
+ if (last_end_dma_addr != dma_addr)
+ break;
+
+ /*
+ * Coalesce this element with the last. If it is small
+ * enough just update mr->length. Otherwise start
+ * mapping from the next page.
+ */
+ goto next_page;
}
do {
- if (unlikely(set_page(mr, page_addr)))
- goto done;
+ ret = set_page(mr, page_addr);
+ if (unlikely(ret < 0))
+ return i ? : ret;
+next_page:
page_addr += mr->page_size;
} while (page_addr < end_dma_addr);
@@ -1574,7 +1576,6 @@ int ib_sg_to_pages(struct ib_mr *mr,
last_page_off = end_dma_addr & ~page_mask;
}
-done:
return i;
}
EXPORT_SYMBOL(ib_sg_to_pages);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index f567160a4a56..97d6878f9938 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -456,7 +456,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
props->max_sge = min(dev->dev->caps.max_sq_sg,
dev->dev->caps.max_rq_sg);
- props->max_sge_rd = props->max_sge;
+ props->max_sge_rd = MLX4_MAX_SGE_RD;
props->max_cq = dev->dev->quotas.cq;
props->max_cqe = dev->dev->caps.max_cqes;
props->max_mr = dev->dev->quotas.mpt;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index a2e4ca56da44..13eaaf45288f 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -34,6 +34,7 @@
#include <linux/log2.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_pack.h>
@@ -795,8 +796,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err_mtt;
- qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
- qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
+ qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
+ if (!qp->sq.wrid)
+ qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
+ gfp, PAGE_KERNEL);
+ qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
+ if (!qp->rq.wrid)
+ qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
+ gfp, PAGE_KERNEL);
if (!qp->sq.wrid || !qp->rq.wrid) {
err = -ENOMEM;
goto err_wrid;
@@ -886,8 +893,8 @@ err_wrid:
if (qp_has_rq(init_attr))
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
} else {
- kfree(qp->sq.wrid);
- kfree(qp->rq.wrid);
+ kvfree(qp->sq.wrid);
+ kvfree(qp->rq.wrid);
}
err_mtt:
@@ -1062,8 +1069,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
&qp->db);
ib_umem_release(qp->umem);
} else {
- kfree(qp->sq.wrid);
- kfree(qp->rq.wrid);
+ kvfree(qp->sq.wrid);
+ kvfree(qp->rq.wrid);
if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
free_proxy_bufs(&dev->ib_dev, qp);
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index dce5dfe3a70e..c394376ebe06 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -34,6 +34,7 @@
#include <linux/mlx4/qp.h>
#include <linux/mlx4/srq.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include "mlx4_ib.h"
#include "user.h"
@@ -172,8 +173,12 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
if (!srq->wrid) {
- err = -ENOMEM;
- goto err_mtt;
+ srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
+ GFP_KERNEL, PAGE_KERNEL);
+ if (!srq->wrid) {
+ err = -ENOMEM;
+ goto err_mtt;
+ }
}
}
@@ -204,7 +209,7 @@ err_wrid:
if (pd->uobject)
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
else
- kfree(srq->wrid);
+ kvfree(srq->wrid);
err_mtt:
mlx4_mtt_cleanup(dev->dev, &srq->mtt);
@@ -281,7 +286,7 @@ int mlx4_ib_destroy_srq(struct ib_srq *srq)
mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
ib_umem_release(msrq->umem);
} else {
- kfree(msrq->wrid);
+ kvfree(msrq->wrid);
mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift,
&msrq->buf);
mlx4_db_free(dev->dev, &msrq->db);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index ec8993a7b3be..6000f7aeede9 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -381,7 +381,19 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
}
}
} else if (ent->cur > 2 * ent->limit) {
- if (!someone_adding(cache) &&
+ /*
+ * The remove_keys() logic is performed as garbage collection
+ * task. Such task is intended to be run when no other active
+ * processes are running.
+ *
+ * The need_resched() will return TRUE if there are user tasks
+ * to be activated in near future.
+ *
+ * In such case, we don't execute remove_keys() and postpone
+ * the garbage collection work to try to run in next cycle,
+ * in order to free CPU resources to other tasks.
+ */
+ if (!need_resched() && !someone_adding(cache) &&
time_after(jiffies, cache->last_add + 300 * HZ)) {
remove_keys(dev, i, 1);
if (ent->cur > ent->limit)
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index ae80590aabdf..040bb8b5cb15 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -232,6 +232,10 @@ struct phy_info {
u16 interface_type;
};
+enum ocrdma_flags {
+ OCRDMA_FLAGS_LINK_STATUS_INIT = 0x01
+};
+
struct ocrdma_dev {
struct ib_device ibdev;
struct ocrdma_dev_attr attr;
@@ -287,6 +291,7 @@ struct ocrdma_dev {
atomic_t update_sl;
u16 pvid;
u32 asic_id;
+ u32 flags;
ulong last_stats_time;
struct mutex stats_lock; /* provide synch for debugfs operations */
@@ -591,4 +596,9 @@ static inline u8 ocrdma_is_enabled_and_synced(u32 state)
(state & OCRDMA_STATE_FLAG_SYNC);
}
+static inline u8 ocrdma_get_ae_link_state(u32 ae_state)
+{
+ return ((ae_state & OCRDMA_AE_LSC_LS_MASK) >> OCRDMA_AE_LSC_LS_SHIFT);
+}
+
#endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 30f67bebffa3..283ca842ff74 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -579,6 +579,8 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev,
cmd->async_event_bitmap = BIT(OCRDMA_ASYNC_GRP5_EVE_CODE);
cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_RDMA_EVE_CODE);
+ /* Request link events on this MQ. */
+ cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_LINK_EVE_CODE);
cmd->async_cqid_ringsize = cq->id;
cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
@@ -819,20 +821,42 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
}
}
+static void ocrdma_process_link_state(struct ocrdma_dev *dev,
+ struct ocrdma_ae_mcqe *cqe)
+{
+ struct ocrdma_ae_lnkst_mcqe *evt;
+ u8 lstate;
+
+ evt = (struct ocrdma_ae_lnkst_mcqe *)cqe;
+ lstate = ocrdma_get_ae_link_state(evt->speed_state_ptn);
+
+ if (!(lstate & OCRDMA_AE_LSC_LLINK_MASK))
+ return;
+
+ if (dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT)
+ ocrdma_update_link_state(dev, (lstate & OCRDMA_LINK_ST_MASK));
+}
+
static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
{
/* async CQE processing */
struct ocrdma_ae_mcqe *cqe = ae_cqe;
u32 evt_code = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_CODE_MASK) >>
OCRDMA_AE_MCQE_EVENT_CODE_SHIFT;
-
- if (evt_code == OCRDMA_ASYNC_RDMA_EVE_CODE)
+ switch (evt_code) {
+ case OCRDMA_ASYNC_LINK_EVE_CODE:
+ ocrdma_process_link_state(dev, cqe);
+ break;
+ case OCRDMA_ASYNC_RDMA_EVE_CODE:
ocrdma_dispatch_ibevent(dev, cqe);
- else if (evt_code == OCRDMA_ASYNC_GRP5_EVE_CODE)
+ break;
+ case OCRDMA_ASYNC_GRP5_EVE_CODE:
ocrdma_process_grp5_aync(dev, cqe);
- else
+ break;
+ default:
pr_err("%s(%d) invalid evt code=0x%x\n", __func__,
dev->id, evt_code);
+ }
}
static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe)
@@ -1363,7 +1387,8 @@ mbx_err:
return status;
}
-int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
+int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed,
+ u8 *lnk_state)
{
int status = -ENOMEM;
struct ocrdma_get_link_speed_rsp *rsp;
@@ -1384,8 +1409,11 @@ int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
goto mbx_err;
rsp = (struct ocrdma_get_link_speed_rsp *)cmd;
- *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK)
- >> OCRDMA_PHY_PS_SHIFT;
+ if (lnk_speed)
+ *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK)
+ >> OCRDMA_PHY_PS_SHIFT;
+ if (lnk_state)
+ *lnk_state = (rsp->res_lnk_st & OCRDMA_LINK_ST_MASK);
mbx_err:
kfree(cmd);
@@ -2515,9 +2543,10 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
- if (vlan_id < 0x1000) {
- if (dev->pfc_state) {
- vlan_id = 0;
+ if (vlan_id == 0xFFFF)
+ vlan_id = 0;
+ if (vlan_id || dev->pfc_state) {
+ if (!vlan_id) {
pr_err("ocrdma%d:Using VLAN with PFC is recommended\n",
dev->id);
pr_err("ocrdma%d:Using VLAN 0 for this connection\n",
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
index 7ed885c1851e..ebc1f442aec3 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
@@ -106,7 +106,8 @@ void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed,
bool solicited, u16 cqe_popped);
/* verbs specific mailbox commands */
-int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed);
+int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed,
+ u8 *lnk_st);
int ocrdma_query_config(struct ocrdma_dev *,
struct ocrdma_mbx_query_config *config);
@@ -153,5 +154,6 @@ char *port_speed_string(struct ocrdma_dev *dev);
void ocrdma_init_service_level(struct ocrdma_dev *);
void ocrdma_alloc_pd_pool(struct ocrdma_dev *dev);
void ocrdma_free_pd_range(struct ocrdma_dev *dev);
+void ocrdma_update_link_state(struct ocrdma_dev *dev, u8 lstate);
#endif /* __OCRDMA_HW_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 62b7009daa6c..3afb40b85159 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -290,6 +290,7 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
{
int status = 0, i;
+ u8 lstate = 0;
struct ocrdma_dev *dev;
dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
@@ -319,6 +320,11 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
if (status)
goto alloc_err;
+ /* Query Link state and update */
+ status = ocrdma_mbx_get_link_speed(dev, NULL, &lstate);
+ if (!status)
+ ocrdma_update_link_state(dev, lstate);
+
for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
goto sysfs_err;
@@ -373,7 +379,7 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
ocrdma_remove_free(dev);
}
-static int ocrdma_open(struct ocrdma_dev *dev)
+static int ocrdma_dispatch_port_active(struct ocrdma_dev *dev)
{
struct ib_event port_event;
@@ -384,32 +390,9 @@ static int ocrdma_open(struct ocrdma_dev *dev)
return 0;
}
-static int ocrdma_close(struct ocrdma_dev *dev)
+static int ocrdma_dispatch_port_error(struct ocrdma_dev *dev)
{
- int i;
- struct ocrdma_qp *qp, **cur_qp;
struct ib_event err_event;
- struct ib_qp_attr attrs;
- int attr_mask = IB_QP_STATE;
-
- attrs.qp_state = IB_QPS_ERR;
- mutex_lock(&dev->dev_lock);
- if (dev->qp_tbl) {
- cur_qp = dev->qp_tbl;
- for (i = 0; i < OCRDMA_MAX_QP; i++) {
- qp = cur_qp[i];
- if (qp && qp->ibqp.qp_type != IB_QPT_GSI) {
- /* change the QP state to ERROR */
- _ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask);
-
- err_event.event = IB_EVENT_QP_FATAL;
- err_event.element.qp = &qp->ibqp;
- err_event.device = &dev->ibdev;
- ib_dispatch_event(&err_event);
- }
- }
- }
- mutex_unlock(&dev->dev_lock);
err_event.event = IB_EVENT_PORT_ERR;
err_event.element.port_num = 1;
@@ -420,7 +403,7 @@ static int ocrdma_close(struct ocrdma_dev *dev)
static void ocrdma_shutdown(struct ocrdma_dev *dev)
{
- ocrdma_close(dev);
+ ocrdma_dispatch_port_error(dev);
ocrdma_remove(dev);
}
@@ -431,18 +414,28 @@ static void ocrdma_shutdown(struct ocrdma_dev *dev)
static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
{
switch (event) {
- case BE_DEV_UP:
- ocrdma_open(dev);
- break;
- case BE_DEV_DOWN:
- ocrdma_close(dev);
- break;
case BE_DEV_SHUTDOWN:
ocrdma_shutdown(dev);
break;
+ default:
+ break;
}
}
+void ocrdma_update_link_state(struct ocrdma_dev *dev, u8 lstate)
+{
+ if (!(dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT)) {
+ dev->flags |= OCRDMA_FLAGS_LINK_STATUS_INIT;
+ if (!lstate)
+ return;
+ }
+
+ if (!lstate)
+ ocrdma_dispatch_port_error(dev);
+ else
+ ocrdma_dispatch_port_active(dev);
+}
+
static struct ocrdma_driver ocrdma_drv = {
.name = "ocrdma_driver",
.add = ocrdma_add,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 6a38268bbe9f..99dd6fdf06d7 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -465,8 +465,11 @@ struct ocrdma_ae_qp_mcqe {
u32 valid_ae_event;
};
-#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14
-#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5
+enum ocrdma_async_event_code {
+ OCRDMA_ASYNC_LINK_EVE_CODE = 0x01,
+ OCRDMA_ASYNC_GRP5_EVE_CODE = 0x05,
+ OCRDMA_ASYNC_RDMA_EVE_CODE = 0x14
+};
enum ocrdma_async_grp5_events {
OCRDMA_ASYNC_EVENT_QOS_VALUE = 0x01,
@@ -489,6 +492,44 @@ enum OCRDMA_ASYNC_EVENT_TYPE {
OCRDMA_MAX_ASYNC_ERRORS
};
+struct ocrdma_ae_lnkst_mcqe {
+ u32 speed_state_ptn;
+ u32 qos_reason_falut;
+ u32 evt_tag;
+ u32 valid_ae_event;
+};
+
+enum {
+ OCRDMA_AE_LSC_PORT_NUM_MASK = 0x3F,
+ OCRDMA_AE_LSC_PT_SHIFT = 0x06,
+ OCRDMA_AE_LSC_PT_MASK = (0x03 <<
+ OCRDMA_AE_LSC_PT_SHIFT),
+ OCRDMA_AE_LSC_LS_SHIFT = 0x08,
+ OCRDMA_AE_LSC_LS_MASK = (0xFF <<
+ OCRDMA_AE_LSC_LS_SHIFT),
+ OCRDMA_AE_LSC_LD_SHIFT = 0x10,
+ OCRDMA_AE_LSC_LD_MASK = (0xFF <<
+ OCRDMA_AE_LSC_LD_SHIFT),
+ OCRDMA_AE_LSC_PPS_SHIFT = 0x18,
+ OCRDMA_AE_LSC_PPS_MASK = (0xFF <<
+ OCRDMA_AE_LSC_PPS_SHIFT),
+ OCRDMA_AE_LSC_PPF_MASK = 0xFF,
+ OCRDMA_AE_LSC_ER_SHIFT = 0x08,
+ OCRDMA_AE_LSC_ER_MASK = (0xFF <<
+ OCRDMA_AE_LSC_ER_SHIFT),
+ OCRDMA_AE_LSC_QOS_SHIFT = 0x10,
+ OCRDMA_AE_LSC_QOS_MASK = (0xFFFF <<
+ OCRDMA_AE_LSC_QOS_SHIFT)
+};
+
+enum {
+ OCRDMA_AE_LSC_PLINK_DOWN = 0x00,
+ OCRDMA_AE_LSC_PLINK_UP = 0x01,
+ OCRDMA_AE_LSC_LLINK_DOWN = 0x02,
+ OCRDMA_AE_LSC_LLINK_MASK = 0x02,
+ OCRDMA_AE_LSC_LLINK_UP = 0x03
+};
+
/* mailbox command request and responses */
enum {
OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT = 2,
@@ -676,7 +717,7 @@ enum {
OCRDMA_PHY_PFLT_SHIFT = 0x18,
OCRDMA_QOS_LNKSP_MASK = 0xFFFF0000,
OCRDMA_QOS_LNKSP_SHIFT = 0x10,
- OCRDMA_LLST_MASK = 0xFF,
+ OCRDMA_LINK_ST_MASK = 0x01,
OCRDMA_PLFC_MASK = 0x00000400,
OCRDMA_PLFC_SHIFT = 0x8,
OCRDMA_PLRFC_MASK = 0x00000200,
@@ -691,7 +732,7 @@ struct ocrdma_get_link_speed_rsp {
u32 pflt_pps_ld_pnum;
u32 qos_lsp;
- u32 res_lls;
+ u32 res_lnk_st;
};
enum {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 583001bcfb8f..76e96f97b3f6 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -171,7 +171,7 @@ static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
int status;
u8 speed;
- status = ocrdma_mbx_get_link_speed(dev, &speed);
+ status = ocrdma_mbx_get_link_speed(dev, &speed, NULL);
if (status)
speed = OCRDMA_PHYS_LINK_SPEED_ZERO;
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c
index 5e27f76805e2..4c7c3c84a741 100644
--- a/drivers/infiniband/hw/qib/qib_qsfp.c
+++ b/drivers/infiniband/hw/qib/qib_qsfp.c
@@ -292,7 +292,7 @@ int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp)
qib_dev_porterr(ppd->dd, ppd->port,
"QSFP byte0 is 0x%02X, S/B 0x0C/D\n", peek[0]);
- if ((peek[2] & 2) == 0) {
+ if ((peek[2] & 4) == 0) {
/*
* If cable is paged, rather than "flat memory", we need to
* set the page to zero, Even if it already appears to be zero.
@@ -538,7 +538,7 @@ int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len)
sofar += scnprintf(buf + sofar, len - sofar, "Date:%.*s\n",
QSFP_DATE_LEN, cd.date);
sofar += scnprintf(buf + sofar, len - sofar, "Lot:%.*s\n",
- QSFP_LOT_LEN, cd.date);
+ QSFP_LOT_LEN, cd.lot);
while (bidx < QSFP_DEFAULT_HDR_CNT) {
int iidx;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 2baf5ad251ed..bc803f33d5f6 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -329,9 +329,9 @@ struct qib_sge {
struct qib_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
- struct qib_mregion mr; /* must be last */
u64 *pages;
u32 npages;
+ struct qib_mregion mr; /* must be last */
};
/*
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index a93070210109..42f4da620f2e 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -1293,7 +1293,7 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
sector_t sector_off = mr_status.sig_err.sig_err_offset;
- do_div(sector_off, sector_size + 8);
+ sector_div(sector_off, sector_size + 8);
*sector = scsi_get_lba(iser_task->sc) + sector_off;
pr_err("PI error found type %d at sector %llx "
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index dfbbbb28090b..8a51c3b5d657 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -157,16 +157,9 @@ isert_create_qp(struct isert_conn *isert_conn,
attr.recv_cq = comp->cq;
attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS;
attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
- /*
- * FIXME: Use devattr.max_sge - 2 for max_send_sge as
- * work-around for RDMA_READs with ConnectX-2.
- *
- * Also, still make sure to have at least two SGEs for
- * outgoing control PDU responses.
- */
- attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2);
- isert_conn->max_sge = attr.cap.max_send_sge;
-
+ attr.cap.max_send_sge = device->dev_attr.max_sge;
+ isert_conn->max_sge = min(device->dev_attr.max_sge,
+ device->dev_attr.max_sge_rd);
attr.cap.max_recv_sge = 1;
attr.sq_sig_type = IB_SIGNAL_REQ_WR;
attr.qp_type = IB_QPT_RC;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 9909022dc6c3..3db9a659719b 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -488,7 +488,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
struct ib_qp *qp;
struct ib_fmr_pool *fmr_pool = NULL;
struct srp_fr_pool *fr_pool = NULL;
- const int m = 1 + dev->use_fast_reg;
+ const int m = dev->use_fast_reg ? 3 : 1;
struct ib_cq_init_attr cq_attr = {};
int ret;
@@ -994,16 +994,16 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
ret = srp_lookup_path(ch);
if (ret)
- return ret;
+ goto out;
while (1) {
init_completion(&ch->done);
ret = srp_send_req(ch, multich);
if (ret)
- return ret;
+ goto out;
ret = wait_for_completion_interruptible(&ch->done);
if (ret < 0)
- return ret;
+ goto out;
/*
* The CM event handling code will set status to
@@ -1011,15 +1011,16 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
* back, or SRP_DLID_REDIRECT if we get a lid/qp
* redirect REJ back.
*/
- switch (ch->status) {
+ ret = ch->status;
+ switch (ret) {
case 0:
ch->connected = true;
- return 0;
+ goto out;
case SRP_PORT_REDIRECT:
ret = srp_lookup_path(ch);
if (ret)
- return ret;
+ goto out;
break;
case SRP_DLID_REDIRECT:
@@ -1028,13 +1029,16 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
case SRP_STALE_CONN:
shost_printk(KERN_ERR, target->scsi_host, PFX
"giving up on stale connection\n");
- ch->status = -ECONNRESET;
- return ch->status;
+ ret = -ECONNRESET;
+ goto out;
default:
- return ch->status;
+ goto out;
}
}
+
+out:
+ return ret <= 0 ? ret : -ENODEV;
}
static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
@@ -1309,7 +1313,7 @@ reset_state:
}
static int srp_map_finish_fr(struct srp_map_state *state,
- struct srp_rdma_ch *ch)
+ struct srp_rdma_ch *ch, int sg_nents)
{
struct srp_target_port *target = ch->target;
struct srp_device *dev = target->srp_host->srp_dev;
@@ -1324,10 +1328,10 @@ static int srp_map_finish_fr(struct srp_map_state *state,
WARN_ON_ONCE(!dev->use_fast_reg);
- if (state->sg_nents == 0)
+ if (sg_nents == 0)
return 0;
- if (state->sg_nents == 1 && target->global_mr) {
+ if (sg_nents == 1 && target->global_mr) {
srp_map_desc(state, sg_dma_address(state->sg),
sg_dma_len(state->sg),
target->global_mr->rkey);
@@ -1341,8 +1345,7 @@ static int srp_map_finish_fr(struct srp_map_state *state,
rkey = ib_inc_rkey(desc->mr->rkey);
ib_update_fast_reg_key(desc->mr, rkey);
- n = ib_map_mr_sg(desc->mr, state->sg, state->sg_nents,
- dev->mr_page_size);
+ n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, dev->mr_page_size);
if (unlikely(n < 0))
return n;
@@ -1448,16 +1451,15 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
state->fr.next = req->fr_list;
state->fr.end = req->fr_list + ch->target->cmd_sg_cnt;
state->sg = scat;
- state->sg_nents = scsi_sg_count(req->scmnd);
- while (state->sg_nents) {
+ while (count) {
int i, n;
- n = srp_map_finish_fr(state, ch);
+ n = srp_map_finish_fr(state, ch, count);
if (unlikely(n < 0))
return n;
- state->sg_nents -= n;
+ count -= n;
for (i = 0; i < n; i++)
state->sg = sg_next(state->sg);
}
@@ -1517,10 +1519,12 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
if (dev->use_fast_reg) {
state.sg = idb_sg;
- state.sg_nents = 1;
sg_set_buf(idb_sg, req->indirect_desc, idb_len);
idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
- ret = srp_map_finish_fr(&state, ch);
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ idb_sg->dma_length = idb_sg->length; /* hack^2 */
+#endif
+ ret = srp_map_finish_fr(&state, ch, 1);
if (ret < 0)
return ret;
} else if (dev->use_fmr) {
@@ -1655,7 +1659,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
return ret;
req->nmdesc++;
} else {
- idb_rkey = target->global_mr->rkey;
+ idb_rkey = cpu_to_be32(target->global_mr->rkey);
}
indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 87a2a919dc43..f6af531f9f32 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -300,10 +300,7 @@ struct srp_map_state {
dma_addr_t base_dma_addr;
u32 dma_len;
u32 total_len;
- union {
- unsigned int npages;
- int sg_nents;
- };
+ unsigned int npages;
unsigned int nmdesc;
unsigned int ndesc;
};
diff --git a/drivers/input/joystick/db9.c b/drivers/input/joystick/db9.c
index 932d07307454..da326090c2b0 100644
--- a/drivers/input/joystick/db9.c
+++ b/drivers/input/joystick/db9.c
@@ -592,6 +592,7 @@ static void db9_attach(struct parport *pp)
return;
}
+ memset(&db9_parport_cb, 0, sizeof(db9_parport_cb));
db9_parport_cb.flags = PARPORT_FLAG_EXCL;
pd = parport_register_dev_model(pp, "db9", &db9_parport_cb, port_idx);
diff --git a/drivers/input/joystick/gamecon.c b/drivers/input/joystick/gamecon.c
index 5a672dcac0d8..eae14d512353 100644
--- a/drivers/input/joystick/gamecon.c
+++ b/drivers/input/joystick/gamecon.c
@@ -951,6 +951,7 @@ static void gc_attach(struct parport *pp)
pads = gc_cfg[port_idx].args + 1;
n_pads = gc_cfg[port_idx].nargs - 1;
+ memset(&gc_parport_cb, 0, sizeof(gc_parport_cb));
gc_parport_cb.flags = PARPORT_FLAG_EXCL;
pd = parport_register_dev_model(pp, "gamecon", &gc_parport_cb,
diff --git a/drivers/input/joystick/turbografx.c b/drivers/input/joystick/turbografx.c
index 9f5bca26bd2f..77f575dd0901 100644
--- a/drivers/input/joystick/turbografx.c
+++ b/drivers/input/joystick/turbografx.c
@@ -181,6 +181,7 @@ static void tgfx_attach(struct parport *pp)
n_buttons = tgfx_cfg[port_idx].args + 1;
n_devs = tgfx_cfg[port_idx].nargs - 1;
+ memset(&tgfx_parport_cb, 0, sizeof(tgfx_parport_cb));
tgfx_parport_cb.flags = PARPORT_FLAG_EXCL;
pd = parport_register_dev_model(pp, "turbografx", &tgfx_parport_cb,
diff --git a/drivers/input/joystick/walkera0701.c b/drivers/input/joystick/walkera0701.c
index 9c07fe911075..70a893a17467 100644
--- a/drivers/input/joystick/walkera0701.c
+++ b/drivers/input/joystick/walkera0701.c
@@ -218,6 +218,7 @@ static void walkera0701_attach(struct parport *pp)
w->parport = pp;
+ memset(&walkera0701_parport_cb, 0, sizeof(walkera0701_parport_cb));
walkera0701_parport_cb.flags = PARPORT_FLAG_EXCL;
walkera0701_parport_cb.irq_func = walkera0701_irq_handler;
walkera0701_parport_cb.private = w;
diff --git a/drivers/input/misc/arizona-haptics.c b/drivers/input/misc/arizona-haptics.c
index 4bf678541496..d5994a745ffa 100644
--- a/drivers/input/misc/arizona-haptics.c
+++ b/drivers/input/misc/arizona-haptics.c
@@ -97,8 +97,7 @@ static void arizona_haptics_work(struct work_struct *work)
ret = regmap_update_bits(arizona->regmap,
ARIZONA_HAPTICS_CONTROL_1,
- ARIZONA_HAP_CTRL_MASK,
- 1 << ARIZONA_HAP_CTRL_SHIFT);
+ ARIZONA_HAP_CTRL_MASK, 0);
if (ret != 0) {
dev_err(arizona->dev, "Failed to stop haptics: %d\n",
ret);
diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 5e1665bbaa0b..2f589857a039 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -41,6 +41,7 @@
#define DRIVER_NAME "elan_i2c"
#define ELAN_DRIVER_VERSION "1.6.1"
+#define ELAN_VENDOR_ID 0x04f3
#define ETP_MAX_PRESSURE 255
#define ETP_FWIDTH_REDUCE 90
#define ETP_FINGER_WIDTH 15
@@ -914,6 +915,8 @@ static int elan_setup_input_device(struct elan_tp_data *data)
input->name = "Elan Touchpad";
input->id.bustype = BUS_I2C;
+ input->id.vendor = ELAN_VENDOR_ID;
+ input->id.product = data->product_id;
input_set_drvdata(input, data);
error = input_mt_init_slots(input, ETP_MAX_FINGERS,
diff --git a/drivers/input/serio/parkbd.c b/drivers/input/serio/parkbd.c
index 92c31b8f8fb4..1edfac78d4ac 100644
--- a/drivers/input/serio/parkbd.c
+++ b/drivers/input/serio/parkbd.c
@@ -145,6 +145,7 @@ static int parkbd_getport(struct parport *pp)
{
struct pardev_cb parkbd_parport_cb;
+ memset(&parkbd_parport_cb, 0, sizeof(parkbd_parport_cb));
parkbd_parport_cb.irq_func = parkbd_interrupt;
parkbd_parport_cb.flags = PARPORT_FLAG_EXCL;
diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c
index e7f966da6efa..78ca44840d60 100644
--- a/drivers/input/tablet/aiptek.c
+++ b/drivers/input/tablet/aiptek.c
@@ -1819,6 +1819,14 @@ aiptek_probe(struct usb_interface *intf, const struct usb_device_id *id)
input_set_abs_params(inputdev, ABS_TILT_Y, AIPTEK_TILT_MIN, AIPTEK_TILT_MAX, 0, 0);
input_set_abs_params(inputdev, ABS_WHEEL, AIPTEK_WHEEL_MIN, AIPTEK_WHEEL_MAX - 1, 0, 0);
+ /* Verify that a device really has an endpoint */
+ if (intf->altsetting[0].desc.bNumEndpoints < 1) {
+ dev_err(&intf->dev,
+ "interface has %d endpoints, but must have minimum 1\n",
+ intf->altsetting[0].desc.bNumEndpoints);
+ err = -EINVAL;
+ goto fail3;
+ }
endpoint = &intf->altsetting[0].endpoint[0].desc;
/* Go set up our URB, which is called when the tablet receives
@@ -1861,6 +1869,7 @@ aiptek_probe(struct usb_interface *intf, const struct usb_device_id *id)
if (i == ARRAY_SIZE(speeds)) {
dev_info(&intf->dev,
"Aiptek tried all speeds, no sane response\n");
+ err = -EINVAL;
goto fail3;
}
diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index c5622058c22b..2d5794ec338b 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -2487,6 +2487,31 @@ static struct mxt_acpi_platform_data samus_platform_data[] = {
{ }
};
+static unsigned int chromebook_tp_buttons[] = {
+ KEY_RESERVED,
+ KEY_RESERVED,
+ KEY_RESERVED,
+ KEY_RESERVED,
+ KEY_RESERVED,
+ BTN_LEFT
+};
+
+static struct mxt_acpi_platform_data chromebook_platform_data[] = {
+ {
+ /* Touchpad */
+ .hid = "ATML0000",
+ .pdata = {
+ .t19_num_keys = ARRAY_SIZE(chromebook_tp_buttons),
+ .t19_keymap = chromebook_tp_buttons,
+ },
+ },
+ {
+ /* Touchscreen */
+ .hid = "ATML0001",
+ },
+ { }
+};
+
static const struct dmi_system_id mxt_dmi_table[] = {
{
/* 2015 Google Pixel */
@@ -2497,6 +2522,14 @@ static const struct dmi_system_id mxt_dmi_table[] = {
},
.driver_data = samus_platform_data,
},
+ {
+ /* Other Google Chromebooks */
+ .ident = "Chromebook",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+ },
+ .driver_data = chromebook_platform_data,
+ },
{ }
};
@@ -2701,6 +2734,7 @@ static const struct i2c_device_id mxt_id[] = {
{ "qt602240_ts", 0 },
{ "atmel_mxt_ts", 0 },
{ "atmel_mxt_tp", 0 },
+ { "maxtouch", 0 },
{ "mXT224", 0 },
{ }
};
diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c
index 17cc20ef4923..ac09855fa435 100644
--- a/drivers/input/touchscreen/elants_i2c.c
+++ b/drivers/input/touchscreen/elants_i2c.c
@@ -1316,7 +1316,13 @@ static int __maybe_unused elants_i2c_suspend(struct device *dev)
disable_irq(client->irq);
- if (device_may_wakeup(dev) || ts->keep_power_in_suspend) {
+ if (device_may_wakeup(dev)) {
+ /*
+ * The device will automatically enter idle mode
+ * that has reduced power consumption.
+ */
+ ts->wake_irq_enabled = (enable_irq_wake(client->irq) == 0);
+ } else if (ts->keep_power_in_suspend) {
for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) {
error = elants_i2c_send(client, set_sleep_cmd,
sizeof(set_sleep_cmd));
@@ -1326,10 +1332,6 @@ static int __maybe_unused elants_i2c_suspend(struct device *dev)
dev_err(&client->dev,
"suspend command failed: %d\n", error);
}
-
- if (device_may_wakeup(dev))
- ts->wake_irq_enabled =
- (enable_irq_wake(client->irq) == 0);
} else {
elants_i2c_power_off(ts);
}
@@ -1345,10 +1347,11 @@ static int __maybe_unused elants_i2c_resume(struct device *dev)
int retry_cnt;
int error;
- if (device_may_wakeup(dev) && ts->wake_irq_enabled)
- disable_irq_wake(client->irq);
-
- if (ts->keep_power_in_suspend) {
+ if (device_may_wakeup(dev)) {
+ if (ts->wake_irq_enabled)
+ disable_irq_wake(client->irq);
+ elants_i2c_sw_reset(client);
+ } else if (ts->keep_power_in_suspend) {
for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) {
error = elants_i2c_send(client, set_active_cmd,
sizeof(set_active_cmd));
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index d21d4edf7236..7caf2fa237f2 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -494,6 +494,22 @@ static void handle_fault_error(struct fault *fault)
}
}
+static bool access_error(struct vm_area_struct *vma, struct fault *fault)
+{
+ unsigned long requested = 0;
+
+ if (fault->flags & PPR_FAULT_EXEC)
+ requested |= VM_EXEC;
+
+ if (fault->flags & PPR_FAULT_READ)
+ requested |= VM_READ;
+
+ if (fault->flags & PPR_FAULT_WRITE)
+ requested |= VM_WRITE;
+
+ return (requested & ~vma->vm_flags) != 0;
+}
+
static void do_fault(struct work_struct *work)
{
struct fault *fault = container_of(work, struct fault, work);
@@ -516,8 +532,8 @@ static void do_fault(struct work_struct *work)
goto out;
}
- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) {
- /* handle_mm_fault would BUG_ON() */
+ /* Check if we have the right permissions on the vma */
+ if (access_error(vma, fault)) {
up_read(&mm->mmap_sem);
handle_fault_error(fault);
goto out;
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 3a20db4f8604..72d6182666cb 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -21,10 +21,13 @@
#include <linux/device.h>
#include <linux/dma-iommu.h>
+#include <linux/gfp.h>
#include <linux/huge_mm.h>
#include <linux/iommu.h>
#include <linux/iova.h>
#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/vmalloc.h>
int iommu_dma_init(void)
{
@@ -191,6 +194,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
{
struct page **pages;
unsigned int i = 0, array_size = count * sizeof(*pages);
+ unsigned int order = MAX_ORDER;
if (array_size <= PAGE_SIZE)
pages = kzalloc(array_size, GFP_KERNEL);
@@ -204,14 +208,15 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
while (count) {
struct page *page = NULL;
- int j, order = __fls(count);
+ int j;
/*
* Higher-order allocations are a convenience rather
* than a necessity, hence using __GFP_NORETRY until
* falling back to single-page allocations.
*/
- for (order = min(order, MAX_ORDER); order > 0; order--) {
+ for (order = min_t(unsigned int, order, __fls(count));
+ order > 0; order--) {
page = alloc_pages(gfp | __GFP_NORETRY, order);
if (!page)
continue;
@@ -453,7 +458,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
size_t s_offset = iova_offset(iovad, s->offset);
size_t s_length = s->length;
- sg_dma_address(s) = s->offset;
+ sg_dma_address(s) = s_offset;
sg_dma_len(s) = s_length;
s->offset -= s_offset;
s_length = iova_align(iovad, s_length + s_offset);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index f1042daef9ad..ac7387686ddc 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2159,7 +2159,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
sg_res = aligned_nrpages(sg->offset, sg->length);
sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
sg->dma_length = sg->length;
- pteval = (sg_phys(sg) & PAGE_MASK) | prot;
+ pteval = page_to_phys(sg_page(sg)) | prot;
phys_pfn = pteval >> VTD_PAGE_SHIFT;
}
@@ -3704,7 +3704,7 @@ static int intel_nontranslate_map_sg(struct device *hddev,
for_each_sg(sglist, sg, nelems, i) {
BUG_ON(!sg_page(sg));
- sg->dma_address = sg_phys(sg);
+ sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
sg->dma_length = sg->length;
}
return nelems;
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index c69e3f9ec958..50464833d0b8 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -484,6 +484,23 @@ struct page_req_dsc {
};
#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10)
+
+static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req)
+{
+ unsigned long requested = 0;
+
+ if (req->exe_req)
+ requested |= VM_EXEC;
+
+ if (req->rd_req)
+ requested |= VM_READ;
+
+ if (req->wr_req)
+ requested |= VM_WRITE;
+
+ return (requested & ~vma->vm_flags) != 0;
+}
+
static irqreturn_t prq_event_thread(int irq, void *d)
{
struct intel_iommu *iommu = d;
@@ -539,6 +556,9 @@ static irqreturn_t prq_event_thread(int irq, void *d)
if (!vma || address < vma->vm_start)
goto invalid;
+ if (access_error(vma, req))
+ goto invalid;
+
ret = handle_mm_fault(svm->mm, vma, address,
req->wr_req ? FAULT_FLAG_WRITE : 0);
if (ret & VM_FAULT_ERROR)
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 1fae1881648c..c12ba4516df2 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -753,7 +753,7 @@ static inline void set_irq_posting_cap(void)
* should have X86_FEATURE_CX16 support, this has been confirmed
* with Intel hardware guys.
*/
- if ( cpu_has_cx16 )
+ if (boot_cpu_has(X86_FEATURE_CX16))
intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP;
for_each_iommu(iommu, drhd)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index abae363c7b9b..0e3b0092ec92 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1430,7 +1430,7 @@ size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
for_each_sg(sg, s, nents, i) {
- phys_addr_t phys = sg_phys(s);
+ phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset;
/*
* We are mapping on IOMMU page boundaries, so offset within
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 8cf605fa9946..dfb868e2d129 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -295,7 +295,7 @@ static struct iommu_gather_ops ipmmu_gather_ops = {
static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
{
- phys_addr_t ttbr;
+ u64 ttbr;
/*
* Allocate the page table operations.
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index cbe198cb3699..471ee36b9c6e 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -216,6 +216,7 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain,
u8 *page_addr = (u8 *) (pa & PAGE_MASK);
dma_addr_t start_dma_addr = dma_addr;
unsigned long irq_flags, nr_pages, i;
+ unsigned long *entry;
int rc = 0;
if (dma_addr < s390_domain->domain.geometry.aperture_start ||
@@ -228,8 +229,12 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain,
spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags);
for (i = 0; i < nr_pages; i++) {
- dma_update_cpu_trans(s390_domain->dma_table, page_addr,
- dma_addr, flags);
+ entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
+ if (!entry) {
+ rc = -ENOMEM;
+ goto undo_cpu_trans;
+ }
+ dma_update_cpu_trans(entry, page_addr, flags);
page_addr += PAGE_SIZE;
dma_addr += PAGE_SIZE;
}
@@ -242,6 +247,20 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain,
break;
}
spin_unlock(&s390_domain->list_lock);
+
+undo_cpu_trans:
+ if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
+ flags = ZPCI_PTE_INVALID;
+ while (i-- > 0) {
+ page_addr -= PAGE_SIZE;
+ dma_addr -= PAGE_SIZE;
+ entry = dma_walk_cpu_trans(s390_domain->dma_table,
+ dma_addr);
+ if (!entry)
+ break;
+ dma_update_cpu_trans(entry, page_addr, flags);
+ }
+ }
spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
return rc;
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 4d7294e5d982..11fc2a27fa2e 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -8,6 +8,11 @@ config ARM_GIC
select IRQ_DOMAIN_HIERARCHY
select MULTI_IRQ_HANDLER
+config ARM_GIC_MAX_NR
+ int
+ default 2 if ARCH_REALVIEW
+ default 1
+
config ARM_GIC_V2M
bool
depends on ARM_GIC
@@ -27,6 +32,14 @@ config ARM_GIC_V3_ITS
bool
select PCI_MSI_IRQ_DOMAIN
+config HISILICON_IRQ_MBIGEN
+ bool "Support mbigen interrupt controller"
+ default n
+ depends on ARM_GIC_V3 && ARM_GIC_V3_ITS && GENERIC_MSI_IRQ_DOMAIN
+ help
+ Enable the mbigen interrupt controller used on
+ Hisilicon platform.
+
config ARM_NVIC
bool
select IRQ_DOMAIN
@@ -138,6 +151,12 @@ config TB10X_IRQC
select IRQ_DOMAIN
select GENERIC_IRQ_CHIP
+config TS4800_IRQ
+ tristate "TS-4800 IRQ controller"
+ select IRQ_DOMAIN
+ help
+ Support for the TS-4800 FPGA IRQ controller
+
config VERSATILE_FPGA_IRQ
bool
select IRQ_DOMAIN
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 177f78f6e6d6..d4c2e4ebc308 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -21,9 +21,11 @@ obj-$(CONFIG_ARCH_SUNXI) += irq-sun4i.o
obj-$(CONFIG_ARCH_SUNXI) += irq-sunxi-nmi.o
obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o
obj-$(CONFIG_ARM_GIC) += irq-gic.o irq-gic-common.o
+obj-$(CONFIG_REALVIEW_DT) += irq-gic-realview.o
obj-$(CONFIG_ARM_GIC_V2M) += irq-gic-v2m.o
obj-$(CONFIG_ARM_GIC_V3) += irq-gic-v3.o irq-gic-common.o
obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o irq-gic-v3-its-pci-msi.o irq-gic-v3-its-platform-msi.o
+obj-$(CONFIG_HISILICON_IRQ_MBIGEN) += irq-mbigen.o
obj-$(CONFIG_ARM_NVIC) += irq-nvic.o
obj-$(CONFIG_ARM_VIC) += irq-vic.o
obj-$(CONFIG_ATMEL_AIC_IRQ) += irq-atmel-aic-common.o irq-atmel-aic.o
@@ -39,6 +41,7 @@ obj-$(CONFIG_ARCH_NSPIRE) += irq-zevio.o
obj-$(CONFIG_ARCH_VT8500) += irq-vt8500.o
obj-$(CONFIG_ST_IRQCHIP) += irq-st.o
obj-$(CONFIG_TB10X_IRQC) += irq-tb10x.o
+obj-$(CONFIG_TS4800_IRQ) += irq-ts4800.o
obj-$(CONFIG_XTENSA) += irq-xtensa-pic.o
obj-$(CONFIG_XTENSA_MX) += irq-xtensa-mx.o
obj-$(CONFIG_IRQ_CROSSBAR) += irq-crossbar.o
diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c
index f68708281fcf..963065a0d774 100644
--- a/drivers/irqchip/irq-bcm2836.c
+++ b/drivers/irqchip/irq-bcm2836.c
@@ -21,6 +21,9 @@
#include <linux/irqdomain.h>
#include <asm/exception.h>
+#define LOCAL_CONTROL 0x000
+#define LOCAL_PRESCALER 0x008
+
/*
* The low 2 bits identify the CPU that the GPU IRQ goes to, and the
* next 2 bits identify the CPU that the GPU FIQ goes to.
@@ -50,14 +53,16 @@
/* Same status bits as above, but for FIQ. */
#define LOCAL_FIQ_PENDING0 0x070
/*
- * Mailbox0 write-to-set bits. There are 16 mailboxes, 4 per CPU, and
+ * Mailbox write-to-set bits. There are 16 mailboxes, 4 per CPU, and
* these bits are organized by mailbox number and then CPU number. We
* use mailbox 0 for IPIs. The mailbox's interrupt is raised while
* any bit is set.
*/
#define LOCAL_MAILBOX0_SET0 0x080
-/* Mailbox0 write-to-clear bits. */
+#define LOCAL_MAILBOX3_SET0 0x08c
+/* Mailbox write-to-clear bits. */
#define LOCAL_MAILBOX0_CLR0 0x0c0
+#define LOCAL_MAILBOX3_CLR0 0x0cc
#define LOCAL_IRQ_CNTPSIRQ 0
#define LOCAL_IRQ_CNTPNSIRQ 1
@@ -162,7 +167,7 @@ __exception_irq_entry bcm2836_arm_irqchip_handle_irq(struct pt_regs *regs)
u32 stat;
stat = readl_relaxed(intc.base + LOCAL_IRQ_PENDING0 + 4 * cpu);
- if (stat & 0x10) {
+ if (stat & BIT(LOCAL_IRQ_MAILBOX0)) {
#ifdef CONFIG_SMP
void __iomem *mailbox0 = (intc.base +
LOCAL_MAILBOX0_CLR0 + 16 * cpu);
@@ -172,7 +177,7 @@ __exception_irq_entry bcm2836_arm_irqchip_handle_irq(struct pt_regs *regs)
writel(1 << ipi, mailbox0);
handle_IPI(ipi, regs);
#endif
- } else {
+ } else if (stat) {
u32 hwirq = ffs(stat) - 1;
handle_IRQ(irq_linear_revmap(intc.domain, hwirq), regs);
@@ -217,6 +222,24 @@ static struct notifier_block bcm2836_arm_irqchip_cpu_notifier = {
.notifier_call = bcm2836_arm_irqchip_cpu_notify,
.priority = 100,
};
+
+int __init bcm2836_smp_boot_secondary(unsigned int cpu,
+ struct task_struct *idle)
+{
+ unsigned long secondary_startup_phys =
+ (unsigned long)virt_to_phys((void *)secondary_startup);
+
+ dsb();
+ writel(secondary_startup_phys,
+ intc.base + LOCAL_MAILBOX3_SET0 + 16 * cpu);
+
+ return 0;
+}
+
+static const struct smp_operations bcm2836_smp_ops __initconst = {
+ .smp_boot_secondary = bcm2836_smp_boot_secondary,
+};
+
#endif
static const struct irq_domain_ops bcm2836_arm_irqchip_intc_ops = {
@@ -234,9 +257,31 @@ bcm2836_arm_irqchip_smp_init(void)
register_cpu_notifier(&bcm2836_arm_irqchip_cpu_notifier);
set_smp_cross_call(bcm2836_arm_irqchip_send_ipi);
+ smp_set_ops(&bcm2836_smp_ops);
#endif
}
+/*
+ * The LOCAL_IRQ_CNT* timer firings are based off of the external
+ * oscillator with some scaling. The firmware sets up CNTFRQ to
+ * report 19.2Mhz, but doesn't set up the scaling registers.
+ */
+static void bcm2835_init_local_timer_frequency(void)
+{
+ /*
+ * Set the timer to source from the 19.2Mhz crystal clock (bit
+ * 8 unset), and only increment by 1 instead of 2 (bit 9
+ * unset).
+ */
+ writel(0, intc.base + LOCAL_CONTROL);
+
+ /*
+ * Set the timer prescaler to 1:1 (timer freq = input freq *
+ * 2**31 / prescaler)
+ */
+ writel(0x80000000, intc.base + LOCAL_PRESCALER);
+}
+
static int __init bcm2836_arm_irqchip_l1_intc_of_init(struct device_node *node,
struct device_node *parent)
{
@@ -246,6 +291,8 @@ static int __init bcm2836_arm_irqchip_l1_intc_of_init(struct device_node *node,
node->full_name);
}
+ bcm2835_init_local_timer_frequency();
+
intc.domain = irq_domain_add_linear(node, LAST_IRQ + 1,
&bcm2836_arm_irqchip_intc_ops,
NULL);
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index 44a077f3a4a2..f174ce0ca361 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -84,12 +84,15 @@ void __init gic_dist_config(void __iomem *base, int gic_irqs,
writel_relaxed(GICD_INT_DEF_PRI_X4, base + GIC_DIST_PRI + i);
/*
- * Disable all interrupts. Leave the PPI and SGIs alone
- * as they are enabled by redistributor registers.
+ * Deactivate and disable all SPIs. Leave the PPI and SGIs
+ * alone as they are in the redistributor registers on GICv3.
*/
- for (i = 32; i < gic_irqs; i += 32)
+ for (i = 32; i < gic_irqs; i += 32) {
writel_relaxed(GICD_INT_EN_CLR_X32,
- base + GIC_DIST_ENABLE_CLEAR + i / 8);
+ base + GIC_DIST_ACTIVE_CLEAR + i / 8);
+ writel_relaxed(GICD_INT_EN_CLR_X32,
+ base + GIC_DIST_ENABLE_CLEAR + i / 8);
+ }
if (sync_access)
sync_access();
@@ -102,7 +105,9 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void))
/*
* Deal with the banked PPI and SGI interrupts - disable all
* PPI interrupts, ensure all SGI interrupts are enabled.
+ * Make sure everything is deactivated.
*/
+ writel_relaxed(GICD_INT_EN_CLR_X32, base + GIC_DIST_ACTIVE_CLEAR);
writel_relaxed(GICD_INT_EN_CLR_PPI, base + GIC_DIST_ENABLE_CLEAR);
writel_relaxed(GICD_INT_EN_SET_SGI, base + GIC_DIST_ENABLE_SET);
diff --git a/drivers/irqchip/irq-gic-realview.c b/drivers/irqchip/irq-gic-realview.c
new file mode 100644
index 000000000000..aa46eb280a7f
--- /dev/null
+++ b/drivers/irqchip/irq-gic-realview.c
@@ -0,0 +1,43 @@
+/*
+ * Special GIC quirks for the ARM RealView
+ * Copyright (C) 2015 Linus Walleij
+ */
+#include <linux/of.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
+#include <linux/bitops.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/arm-gic.h>
+
+#define REALVIEW_SYS_LOCK_OFFSET 0x20
+#define REALVIEW_PB11MP_SYS_PLD_CTRL1 0x74
+#define VERSATILE_LOCK_VAL 0xA05F
+#define PLD_INTMODE_MASK BIT(22)|BIT(23)|BIT(24)
+#define PLD_INTMODE_LEGACY 0x0
+#define PLD_INTMODE_NEW_DCC BIT(22)
+#define PLD_INTMODE_NEW_NO_DCC BIT(23)
+#define PLD_INTMODE_FIQ_ENABLE BIT(24)
+
+static int __init
+realview_gic_of_init(struct device_node *node, struct device_node *parent)
+{
+ static struct regmap *map;
+
+ /* The PB11MPCore GIC needs to be configured in the syscon */
+ map = syscon_regmap_lookup_by_compatible("arm,realview-pb11mp-syscon");
+ if (!IS_ERR(map)) {
+ /* new irq mode with no DCC */
+ regmap_write(map, REALVIEW_SYS_LOCK_OFFSET,
+ VERSATILE_LOCK_VAL);
+ regmap_update_bits(map, REALVIEW_PB11MP_SYS_PLD_CTRL1,
+ PLD_INTMODE_NEW_NO_DCC,
+ PLD_INTMODE_MASK);
+ regmap_write(map, REALVIEW_SYS_LOCK_OFFSET, 0x0000);
+ pr_info("TC11MP GIC: set up interrupt controller to NEW mode, no DCC\n");
+ } else {
+ pr_err("TC11MP GIC setup: could not find syscon\n");
+ return -ENXIO;
+ }
+ return gic_of_init(node, parent);
+}
+IRQCHIP_DECLARE(armtc11mp_gic, "arm,tc11mp-gic", realview_gic_of_init);
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index 87f8d104acab..c779f83e511d 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -15,9 +15,11 @@
#define pr_fmt(fmt) "GICv2m: " fmt
+#include <linux/acpi.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
#include <linux/kernel.h>
+#include <linux/msi.h>
#include <linux/of_address.h>
#include <linux/of_pci.h>
#include <linux/slab.h>
@@ -55,7 +57,7 @@ static DEFINE_SPINLOCK(v2m_lock);
struct v2m_data {
struct list_head entry;
- struct device_node *node;
+ struct fwnode_handle *fwnode;
struct resource res; /* GICv2m resource */
void __iomem *base; /* GICv2m virt address */
u32 spi_start; /* The SPI number that MSIs start */
@@ -138,6 +140,11 @@ static int gicv2m_irq_gic_domain_alloc(struct irq_domain *domain,
fwspec.param[0] = 0;
fwspec.param[1] = hwirq - 32;
fwspec.param[2] = IRQ_TYPE_EDGE_RISING;
+ } else if (is_fwnode_irqchip(domain->parent->fwnode)) {
+ fwspec.fwnode = domain->parent->fwnode;
+ fwspec.param_count = 2;
+ fwspec.param[0] = hwirq;
+ fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
} else {
return -EINVAL;
}
@@ -254,7 +261,9 @@ static void gicv2m_teardown(void)
list_del(&v2m->entry);
kfree(v2m->bm);
iounmap(v2m->base);
- of_node_put(v2m->node);
+ of_node_put(to_of_node(v2m->fwnode));
+ if (is_fwnode_irqchip(v2m->fwnode))
+ irq_domain_free_fwnode(v2m->fwnode);
kfree(v2m);
}
}
@@ -268,7 +277,7 @@ static int gicv2m_allocate_domains(struct irq_domain *parent)
if (!v2m)
return 0;
- inner_domain = irq_domain_create_tree(of_node_to_fwnode(v2m->node),
+ inner_domain = irq_domain_create_tree(v2m->fwnode,
&gicv2m_domain_ops, v2m);
if (!inner_domain) {
pr_err("Failed to create GICv2m domain\n");
@@ -277,10 +286,10 @@ static int gicv2m_allocate_domains(struct irq_domain *parent)
inner_domain->bus_token = DOMAIN_BUS_NEXUS;
inner_domain->parent = parent;
- pci_domain = pci_msi_create_irq_domain(of_node_to_fwnode(v2m->node),
+ pci_domain = pci_msi_create_irq_domain(v2m->fwnode,
&gicv2m_msi_domain_info,
inner_domain);
- plat_domain = platform_msi_create_irq_domain(of_node_to_fwnode(v2m->node),
+ plat_domain = platform_msi_create_irq_domain(v2m->fwnode,
&gicv2m_pmsi_domain_info,
inner_domain);
if (!pci_domain || !plat_domain) {
@@ -296,8 +305,9 @@ static int gicv2m_allocate_domains(struct irq_domain *parent)
return 0;
}
-static int __init gicv2m_init_one(struct device_node *node,
- struct irq_domain *parent)
+static int __init gicv2m_init_one(struct fwnode_handle *fwnode,
+ u32 spi_start, u32 nr_spis,
+ struct resource *res)
{
int ret;
struct v2m_data *v2m;
@@ -309,13 +319,9 @@ static int __init gicv2m_init_one(struct device_node *node,
}
INIT_LIST_HEAD(&v2m->entry);
- v2m->node = node;
+ v2m->fwnode = fwnode;
- ret = of_address_to_resource(node, 0, &v2m->res);
- if (ret) {
- pr_err("Failed to allocate v2m resource.\n");
- goto err_free_v2m;
- }
+ memcpy(&v2m->res, res, sizeof(struct resource));
v2m->base = ioremap(v2m->res.start, resource_size(&v2m->res));
if (!v2m->base) {
@@ -324,10 +330,9 @@ static int __init gicv2m_init_one(struct device_node *node,
goto err_free_v2m;
}
- if (!of_property_read_u32(node, "arm,msi-base-spi", &v2m->spi_start) &&
- !of_property_read_u32(node, "arm,msi-num-spis", &v2m->nr_spis)) {
- pr_info("Overriding V2M MSI_TYPER (base:%u, num:%u)\n",
- v2m->spi_start, v2m->nr_spis);
+ if (spi_start && nr_spis) {
+ v2m->spi_start = spi_start;
+ v2m->nr_spis = nr_spis;
} else {
u32 typer = readl_relaxed(v2m->base + V2M_MSI_TYPER);
@@ -359,10 +364,9 @@ static int __init gicv2m_init_one(struct device_node *node,
}
list_add_tail(&v2m->entry, &v2m_nodes);
- pr_info("Node %s: range[%#lx:%#lx], SPI[%d:%d]\n", node->name,
- (unsigned long)v2m->res.start, (unsigned long)v2m->res.end,
- v2m->spi_start, (v2m->spi_start + v2m->nr_spis));
+ pr_info("range%pR, SPI[%d:%d]\n", res,
+ v2m->spi_start, (v2m->spi_start + v2m->nr_spis - 1));
return 0;
err_iounmap:
@@ -377,19 +381,36 @@ static struct of_device_id gicv2m_device_id[] = {
{},
};
-int __init gicv2m_of_init(struct device_node *node, struct irq_domain *parent)
+static int __init gicv2m_of_init(struct fwnode_handle *parent_handle,
+ struct irq_domain *parent)
{
int ret = 0;
+ struct device_node *node = to_of_node(parent_handle);
struct device_node *child;
for (child = of_find_matching_node(node, gicv2m_device_id); child;
child = of_find_matching_node(child, gicv2m_device_id)) {
+ u32 spi_start = 0, nr_spis = 0;
+ struct resource res;
+
if (!of_find_property(child, "msi-controller", NULL))
continue;
- ret = gicv2m_init_one(child, parent);
+ ret = of_address_to_resource(child, 0, &res);
+ if (ret) {
+ pr_err("Failed to allocate v2m resource.\n");
+ break;
+ }
+
+ if (!of_property_read_u32(child, "arm,msi-base-spi",
+ &spi_start) &&
+ !of_property_read_u32(child, "arm,msi-num-spis", &nr_spis))
+ pr_info("DT overriding V2M MSI_TYPER (base:%u, num:%u)\n",
+ spi_start, nr_spis);
+
+ ret = gicv2m_init_one(&child->fwnode, spi_start, nr_spis, &res);
if (ret) {
- of_node_put(node);
+ of_node_put(child);
break;
}
}
@@ -400,3 +421,101 @@ int __init gicv2m_of_init(struct device_node *node, struct irq_domain *parent)
gicv2m_teardown();
return ret;
}
+
+#ifdef CONFIG_ACPI
+static int acpi_num_msi;
+
+static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev)
+{
+ struct v2m_data *data;
+
+ if (WARN_ON(acpi_num_msi <= 0))
+ return NULL;
+
+ /* We only return the fwnode of the first MSI frame. */
+ data = list_first_entry_or_null(&v2m_nodes, struct v2m_data, entry);
+ if (!data)
+ return NULL;
+
+ return data->fwnode;
+}
+
+static int __init
+acpi_parse_madt_msi(struct acpi_subtable_header *header,
+ const unsigned long end)
+{
+ int ret;
+ struct resource res;
+ u32 spi_start = 0, nr_spis = 0;
+ struct acpi_madt_generic_msi_frame *m;
+ struct fwnode_handle *fwnode;
+
+ m = (struct acpi_madt_generic_msi_frame *)header;
+ if (BAD_MADT_ENTRY(m, end))
+ return -EINVAL;
+
+ res.start = m->base_address;
+ res.end = m->base_address + SZ_4K - 1;
+ res.flags = IORESOURCE_MEM;
+
+ if (m->flags & ACPI_MADT_OVERRIDE_SPI_VALUES) {
+ spi_start = m->spi_base;
+ nr_spis = m->spi_count;
+
+ pr_info("ACPI overriding V2M MSI_TYPER (base:%u, num:%u)\n",
+ spi_start, nr_spis);
+ }
+
+ fwnode = irq_domain_alloc_fwnode((void *)m->base_address);
+ if (!fwnode) {
+ pr_err("Unable to allocate GICv2m domain token\n");
+ return -EINVAL;
+ }
+
+ ret = gicv2m_init_one(fwnode, spi_start, nr_spis, &res);
+ if (ret)
+ irq_domain_free_fwnode(fwnode);
+
+ return ret;
+}
+
+static int __init gicv2m_acpi_init(struct irq_domain *parent)
+{
+ int ret;
+
+ if (acpi_num_msi > 0)
+ return 0;
+
+ acpi_num_msi = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_MSI_FRAME,
+ acpi_parse_madt_msi, 0);
+
+ if (acpi_num_msi <= 0)
+ goto err_out;
+
+ ret = gicv2m_allocate_domains(parent);
+ if (ret)
+ goto err_out;
+
+ pci_msi_register_fwnode_provider(&gicv2m_get_fwnode);
+
+ return 0;
+
+err_out:
+ gicv2m_teardown();
+ return -EINVAL;
+}
+#else /* CONFIG_ACPI */
+static int __init gicv2m_acpi_init(struct irq_domain *parent)
+{
+ return -EINVAL;
+}
+#endif /* CONFIG_ACPI */
+
+int __init gicv2m_init(struct fwnode_handle *parent_handle,
+ struct irq_domain *parent)
+{
+ if (is_of_node(parent_handle))
+ return gicv2m_of_init(parent_handle, parent);
+
+ return gicv2m_acpi_init(parent);
+}
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 515c823c1c95..911758c056c1 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -69,13 +69,16 @@ union gic_base {
};
struct gic_chip_data {
+ struct irq_chip chip;
union gic_base dist_base;
union gic_base cpu_base;
#ifdef CONFIG_CPU_PM
u32 saved_spi_enable[DIV_ROUND_UP(1020, 32)];
+ u32 saved_spi_active[DIV_ROUND_UP(1020, 32)];
u32 saved_spi_conf[DIV_ROUND_UP(1020, 16)];
u32 saved_spi_target[DIV_ROUND_UP(1020, 4)];
u32 __percpu *saved_ppi_enable;
+ u32 __percpu *saved_ppi_active;
u32 __percpu *saved_ppi_conf;
#endif
struct irq_domain *domain;
@@ -97,11 +100,7 @@ static u8 gic_cpu_map[NR_GIC_CPU_IF] __read_mostly;
static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;
-#ifndef MAX_GIC_NR
-#define MAX_GIC_NR 1
-#endif
-
-static struct gic_chip_data gic_data[MAX_GIC_NR] __read_mostly;
+static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly;
#ifdef CONFIG_GIC_NON_BANKED
static void __iomem *gic_get_percpu_base(union gic_base *base)
@@ -334,7 +333,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK);
irqnr = irqstat & GICC_IAR_INT_ID_MASK;
- if (likely(irqnr > 15 && irqnr < 1021)) {
+ if (likely(irqnr > 15 && irqnr < 1020)) {
if (static_key_true(&supports_deactivate))
writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
handle_domain_irq(gic->domain, irqnr, regs);
@@ -381,7 +380,6 @@ static void gic_handle_cascade_irq(struct irq_desc *desc)
}
static struct irq_chip gic_chip = {
- .name = "GIC",
.irq_mask = gic_mask_irq,
.irq_unmask = gic_unmask_irq,
.irq_eoi = gic_eoi_irq,
@@ -415,8 +413,7 @@ static struct irq_chip gic_eoimode1_chip = {
void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
{
- if (gic_nr >= MAX_GIC_NR)
- BUG();
+ BUG_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR);
irq_set_chained_handler_and_data(irq, gic_handle_cascade_irq,
&gic_data[gic_nr]);
}
@@ -522,7 +519,7 @@ int gic_cpu_if_down(unsigned int gic_nr)
void __iomem *cpu_base;
u32 val = 0;
- if (gic_nr >= MAX_GIC_NR)
+ if (gic_nr >= CONFIG_ARM_GIC_MAX_NR)
return -EINVAL;
cpu_base = gic_data_cpu_base(&gic_data[gic_nr]);
@@ -546,8 +543,7 @@ static void gic_dist_save(unsigned int gic_nr)
void __iomem *dist_base;
int i;
- if (gic_nr >= MAX_GIC_NR)
- BUG();
+ BUG_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR);
gic_irqs = gic_data[gic_nr].gic_irqs;
dist_base = gic_data_dist_base(&gic_data[gic_nr]);
@@ -566,6 +562,10 @@ static void gic_dist_save(unsigned int gic_nr)
for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++)
gic_data[gic_nr].saved_spi_enable[i] =
readl_relaxed(dist_base + GIC_DIST_ENABLE_SET + i * 4);
+
+ for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++)
+ gic_data[gic_nr].saved_spi_active[i] =
+ readl_relaxed(dist_base + GIC_DIST_ACTIVE_SET + i * 4);
}
/*
@@ -581,8 +581,7 @@ static void gic_dist_restore(unsigned int gic_nr)
unsigned int i;
void __iomem *dist_base;
- if (gic_nr >= MAX_GIC_NR)
- BUG();
+ BUG_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR);
gic_irqs = gic_data[gic_nr].gic_irqs;
dist_base = gic_data_dist_base(&gic_data[gic_nr]);
@@ -604,9 +603,19 @@ static void gic_dist_restore(unsigned int gic_nr)
writel_relaxed(gic_data[gic_nr].saved_spi_target[i],
dist_base + GIC_DIST_TARGET + i * 4);
- for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++)
+ for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++) {
+ writel_relaxed(GICD_INT_EN_CLR_X32,
+ dist_base + GIC_DIST_ENABLE_CLEAR + i * 4);
writel_relaxed(gic_data[gic_nr].saved_spi_enable[i],
dist_base + GIC_DIST_ENABLE_SET + i * 4);
+ }
+
+ for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++) {
+ writel_relaxed(GICD_INT_EN_CLR_X32,
+ dist_base + GIC_DIST_ACTIVE_CLEAR + i * 4);
+ writel_relaxed(gic_data[gic_nr].saved_spi_active[i],
+ dist_base + GIC_DIST_ACTIVE_SET + i * 4);
+ }
writel_relaxed(GICD_ENABLE, dist_base + GIC_DIST_CTRL);
}
@@ -618,8 +627,7 @@ static void gic_cpu_save(unsigned int gic_nr)
void __iomem *dist_base;
void __iomem *cpu_base;
- if (gic_nr >= MAX_GIC_NR)
- BUG();
+ BUG_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR);
dist_base = gic_data_dist_base(&gic_data[gic_nr]);
cpu_base = gic_data_cpu_base(&gic_data[gic_nr]);
@@ -631,6 +639,10 @@ static void gic_cpu_save(unsigned int gic_nr)
for (i = 0; i < DIV_ROUND_UP(32, 32); i++)
ptr[i] = readl_relaxed(dist_base + GIC_DIST_ENABLE_SET + i * 4);
+ ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_active);
+ for (i = 0; i < DIV_ROUND_UP(32, 32); i++)
+ ptr[i] = readl_relaxed(dist_base + GIC_DIST_ACTIVE_SET + i * 4);
+
ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_conf);
for (i = 0; i < DIV_ROUND_UP(32, 16); i++)
ptr[i] = readl_relaxed(dist_base + GIC_DIST_CONFIG + i * 4);
@@ -644,8 +656,7 @@ static void gic_cpu_restore(unsigned int gic_nr)
void __iomem *dist_base;
void __iomem *cpu_base;
- if (gic_nr >= MAX_GIC_NR)
- BUG();
+ BUG_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR);
dist_base = gic_data_dist_base(&gic_data[gic_nr]);
cpu_base = gic_data_cpu_base(&gic_data[gic_nr]);
@@ -654,8 +665,18 @@ static void gic_cpu_restore(unsigned int gic_nr)
return;
ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_enable);
- for (i = 0; i < DIV_ROUND_UP(32, 32); i++)
+ for (i = 0; i < DIV_ROUND_UP(32, 32); i++) {
+ writel_relaxed(GICD_INT_EN_CLR_X32,
+ dist_base + GIC_DIST_ENABLE_CLEAR + i * 4);
writel_relaxed(ptr[i], dist_base + GIC_DIST_ENABLE_SET + i * 4);
+ }
+
+ ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_active);
+ for (i = 0; i < DIV_ROUND_UP(32, 32); i++) {
+ writel_relaxed(GICD_INT_EN_CLR_X32,
+ dist_base + GIC_DIST_ACTIVE_CLEAR + i * 4);
+ writel_relaxed(ptr[i], dist_base + GIC_DIST_ACTIVE_SET + i * 4);
+ }
ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_conf);
for (i = 0; i < DIV_ROUND_UP(32, 16); i++)
@@ -673,7 +694,7 @@ static int gic_notifier(struct notifier_block *self, unsigned long cmd, void *v)
{
int i;
- for (i = 0; i < MAX_GIC_NR; i++) {
+ for (i = 0; i < CONFIG_ARM_GIC_MAX_NR; i++) {
#ifdef CONFIG_GIC_NON_BANKED
/* Skip over unused GICs */
if (!gic_data[i].get_base)
@@ -710,6 +731,10 @@ static void __init gic_pm_init(struct gic_chip_data *gic)
sizeof(u32));
BUG_ON(!gic->saved_ppi_enable);
+ gic->saved_ppi_active = __alloc_percpu(DIV_ROUND_UP(32, 32) * 4,
+ sizeof(u32));
+ BUG_ON(!gic->saved_ppi_active);
+
gic->saved_ppi_conf = __alloc_percpu(DIV_ROUND_UP(32, 16) * 4,
sizeof(u32));
BUG_ON(!gic->saved_ppi_conf);
@@ -801,8 +826,7 @@ void gic_migrate_target(unsigned int new_cpu_id)
int i, ror_val, cpu = smp_processor_id();
u32 val, cur_target_mask, active_mask;
- if (gic_nr >= MAX_GIC_NR)
- BUG();
+ BUG_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR);
dist_base = gic_data_dist_base(&gic_data[gic_nr]);
if (!dist_base)
@@ -891,20 +915,15 @@ void __init gic_init_physaddr(struct device_node *node)
static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
irq_hw_number_t hw)
{
- struct irq_chip *chip = &gic_chip;
-
- if (static_key_true(&supports_deactivate)) {
- if (d->host_data == (void *)&gic_data[0])
- chip = &gic_eoimode1_chip;
- }
+ struct gic_chip_data *gic = d->host_data;
if (hw < 32) {
irq_set_percpu_devid(irq);
- irq_domain_set_info(d, irq, hw, chip, d->host_data,
+ irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data,
handle_percpu_devid_irq, NULL, NULL);
irq_set_status_flags(irq, IRQ_NOAUTOEN);
} else {
- irq_domain_set_info(d, irq, hw, chip, d->host_data,
+ irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data,
handle_fasteoi_irq, NULL, NULL);
irq_set_probe(irq);
}
@@ -938,7 +957,7 @@ static int gic_irq_domain_translate(struct irq_domain *d,
return 0;
}
- if (fwspec->fwnode->type == FWNODE_IRQCHIP) {
+ if (is_fwnode_irqchip(fwspec->fwnode)) {
if(fwspec->param_count != 2)
return -EINVAL;
@@ -1006,11 +1025,20 @@ static void __init __gic_init_bases(unsigned int gic_nr, int irq_start,
struct gic_chip_data *gic;
int gic_irqs, irq_base, i;
- BUG_ON(gic_nr >= MAX_GIC_NR);
+ BUG_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR);
gic_check_cpu_features();
gic = &gic_data[gic_nr];
+
+ /* Initialize irq_chip */
+ if (static_key_true(&supports_deactivate) && gic_nr == 0) {
+ gic->chip = gic_eoimode1_chip;
+ } else {
+ gic->chip = gic_chip;
+ gic->chip.name = kasprintf(GFP_KERNEL, "GIC-%d", gic_nr);
+ }
+
#ifdef CONFIG_GIC_NON_BANKED
if (percpu_offset) { /* Frankein-GIC without banked registers... */
unsigned int cpu;
@@ -1162,7 +1190,7 @@ static bool gic_check_eoimode(struct device_node *node, void __iomem **base)
return true;
}
-static int __init
+int __init
gic_of_init(struct device_node *node, struct device_node *parent)
{
void __iomem *cpu_base;
@@ -1200,7 +1228,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
}
if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
- gicv2m_of_init(node, gic_data[gic_cnt].domain);
+ gicv2m_init(&node->fwnode, gic_data[gic_cnt].domain);
gic_cnt++;
return 0;
@@ -1325,6 +1353,10 @@ static int __init gic_v2_acpi_init(struct acpi_subtable_header *header,
__gic_init_bases(0, -1, dist_base, cpu_base, 0, domain_handle);
acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle);
+
+ if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
+ gicv2m_init(NULL, gic_data[0].domain);
+
return 0;
}
IRQCHIP_ACPI_DECLARE(gic_v2, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
new file mode 100644
index 000000000000..4dd3eb8a40b3
--- /dev/null
+++ b/drivers/irqchip/irq-mbigen.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright (C) 2015 Hisilicon Limited, All Rights Reserved.
+ * Author: Jun Ma <majun258@huawei.com>
+ * Author: Yun Wu <wuyun.wu@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqchip.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+/* Interrupt numbers per mbigen node supported */
+#define IRQS_PER_MBIGEN_NODE 128
+
+/* 64 irqs (Pin0-pin63) are reserved for each mbigen chip */
+#define RESERVED_IRQ_PER_MBIGEN_CHIP 64
+
+/* The maximum IRQ pin number of mbigen chip(start from 0) */
+#define MAXIMUM_IRQ_PIN_NUM 1407
+
+/**
+ * In mbigen vector register
+ * bit[21:12]: event id value
+ * bit[11:0]: device id
+ */
+#define IRQ_EVENT_ID_SHIFT 12
+#define IRQ_EVENT_ID_MASK 0x3ff
+
+/* register range of each mbigen node */
+#define MBIGEN_NODE_OFFSET 0x1000
+
+/* offset of vector register in mbigen node */
+#define REG_MBIGEN_VEC_OFFSET 0x200
+
+/**
+ * offset of clear register in mbigen node
+ * This register is used to clear the status
+ * of interrupt
+ */
+#define REG_MBIGEN_CLEAR_OFFSET 0xa000
+
+/**
+ * offset of interrupt type register
+ * This register is used to configure interrupt
+ * trigger type
+ */
+#define REG_MBIGEN_TYPE_OFFSET 0x0
+
+/**
+ * struct mbigen_device - holds the information of mbigen device.
+ *
+ * @pdev: pointer to the platform device structure of mbigen chip.
+ * @base: mapped address of this mbigen chip.
+ */
+struct mbigen_device {
+ struct platform_device *pdev;
+ void __iomem *base;
+};
+
+static inline unsigned int get_mbigen_vec_reg(irq_hw_number_t hwirq)
+{
+ unsigned int nid, pin;
+
+ hwirq -= RESERVED_IRQ_PER_MBIGEN_CHIP;
+ nid = hwirq / IRQS_PER_MBIGEN_NODE + 1;
+ pin = hwirq % IRQS_PER_MBIGEN_NODE;
+
+ return pin * 4 + nid * MBIGEN_NODE_OFFSET
+ + REG_MBIGEN_VEC_OFFSET;
+}
+
+static inline void get_mbigen_type_reg(irq_hw_number_t hwirq,
+ u32 *mask, u32 *addr)
+{
+ unsigned int nid, irq_ofst, ofst;
+
+ hwirq -= RESERVED_IRQ_PER_MBIGEN_CHIP;
+ nid = hwirq / IRQS_PER_MBIGEN_NODE + 1;
+ irq_ofst = hwirq % IRQS_PER_MBIGEN_NODE;
+
+ *mask = 1 << (irq_ofst % 32);
+ ofst = irq_ofst / 32 * 4;
+
+ *addr = ofst + nid * MBIGEN_NODE_OFFSET
+ + REG_MBIGEN_TYPE_OFFSET;
+}
+
+static inline void get_mbigen_clear_reg(irq_hw_number_t hwirq,
+ u32 *mask, u32 *addr)
+{
+ unsigned int ofst;
+
+ hwirq -= RESERVED_IRQ_PER_MBIGEN_CHIP;
+ ofst = hwirq / 32 * 4;
+
+ *mask = 1 << (hwirq % 32);
+ *addr = ofst + REG_MBIGEN_CLEAR_OFFSET;
+}
+
+static void mbigen_eoi_irq(struct irq_data *data)
+{
+ void __iomem *base = data->chip_data;
+ u32 mask, addr;
+
+ get_mbigen_clear_reg(data->hwirq, &mask, &addr);
+
+ writel_relaxed(mask, base + addr);
+
+ irq_chip_eoi_parent(data);
+}
+
+static int mbigen_set_type(struct irq_data *data, unsigned int type)
+{
+ void __iomem *base = data->chip_data;
+ u32 mask, addr, val;
+
+ if (type != IRQ_TYPE_LEVEL_HIGH && type != IRQ_TYPE_EDGE_RISING)
+ return -EINVAL;
+
+ get_mbigen_type_reg(data->hwirq, &mask, &addr);
+
+ val = readl_relaxed(base + addr);
+
+ if (type == IRQ_TYPE_LEVEL_HIGH)
+ val |= mask;
+ else
+ val &= ~mask;
+
+ writel_relaxed(val, base + addr);
+
+ return 0;
+}
+
+static struct irq_chip mbigen_irq_chip = {
+ .name = "mbigen-v2",
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_eoi = mbigen_eoi_irq,
+ .irq_set_type = mbigen_set_type,
+ .irq_set_affinity = irq_chip_set_affinity_parent,
+};
+
+static void mbigen_write_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+ struct irq_data *d = irq_get_irq_data(desc->irq);
+ void __iomem *base = d->chip_data;
+ u32 val;
+
+ base += get_mbigen_vec_reg(d->hwirq);
+ val = readl_relaxed(base);
+
+ val &= ~(IRQ_EVENT_ID_MASK << IRQ_EVENT_ID_SHIFT);
+ val |= (msg->data << IRQ_EVENT_ID_SHIFT);
+
+ /* The address of doorbell is encoded in mbigen register by default
+ * So,we don't need to program the doorbell address at here
+ */
+ writel_relaxed(val, base);
+}
+
+static int mbigen_domain_translate(struct irq_domain *d,
+ struct irq_fwspec *fwspec,
+ unsigned long *hwirq,
+ unsigned int *type)
+{
+ if (is_of_node(fwspec->fwnode)) {
+ if (fwspec->param_count != 2)
+ return -EINVAL;
+
+ if ((fwspec->param[0] > MAXIMUM_IRQ_PIN_NUM) ||
+ (fwspec->param[0] < RESERVED_IRQ_PER_MBIGEN_CHIP))
+ return -EINVAL;
+ else
+ *hwirq = fwspec->param[0];
+
+ /* If there is no valid irq type, just use the default type */
+ if ((fwspec->param[1] == IRQ_TYPE_EDGE_RISING) ||
+ (fwspec->param[1] == IRQ_TYPE_LEVEL_HIGH))
+ *type = fwspec->param[1];
+ else
+ return -EINVAL;
+
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static int mbigen_irq_domain_alloc(struct irq_domain *domain,
+ unsigned int virq,
+ unsigned int nr_irqs,
+ void *args)
+{
+ struct irq_fwspec *fwspec = args;
+ irq_hw_number_t hwirq;
+ unsigned int type;
+ struct mbigen_device *mgn_chip;
+ int i, err;
+
+ err = mbigen_domain_translate(domain, fwspec, &hwirq, &type);
+ if (err)
+ return err;
+
+ err = platform_msi_domain_alloc(domain, virq, nr_irqs);
+ if (err)
+ return err;
+
+ mgn_chip = platform_msi_get_host_data(domain);
+
+ for (i = 0; i < nr_irqs; i++)
+ irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+ &mbigen_irq_chip, mgn_chip->base);
+
+ return 0;
+}
+
+static struct irq_domain_ops mbigen_domain_ops = {
+ .translate = mbigen_domain_translate,
+ .alloc = mbigen_irq_domain_alloc,
+ .free = irq_domain_free_irqs_common,
+};
+
+static int mbigen_device_probe(struct platform_device *pdev)
+{
+ struct mbigen_device *mgn_chip;
+ struct resource *res;
+ struct irq_domain *domain;
+ u32 num_pins;
+
+ mgn_chip = devm_kzalloc(&pdev->dev, sizeof(*mgn_chip), GFP_KERNEL);
+ if (!mgn_chip)
+ return -ENOMEM;
+
+ mgn_chip->pdev = pdev;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ mgn_chip->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(mgn_chip->base))
+ return PTR_ERR(mgn_chip->base);
+
+ if (of_property_read_u32(pdev->dev.of_node, "num-pins", &num_pins) < 0) {
+ dev_err(&pdev->dev, "No num-pins property\n");
+ return -EINVAL;
+ }
+
+ domain = platform_msi_create_device_domain(&pdev->dev, num_pins,
+ mbigen_write_msg,
+ &mbigen_domain_ops,
+ mgn_chip);
+
+ if (!domain)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, mgn_chip);
+
+ dev_info(&pdev->dev, "Allocated %d MSIs\n", num_pins);
+
+ return 0;
+}
+
+static const struct of_device_id mbigen_of_match[] = {
+ { .compatible = "hisilicon,mbigen-v2" },
+ { /* END */ }
+};
+MODULE_DEVICE_TABLE(of, mbigen_of_match);
+
+static struct platform_driver mbigen_platform_driver = {
+ .driver = {
+ .name = "Hisilicon MBIGEN-V2",
+ .owner = THIS_MODULE,
+ .of_match_table = mbigen_of_match,
+ },
+ .probe = mbigen_device_probe,
+};
+
+module_platform_driver(mbigen_platform_driver);
+
+MODULE_AUTHOR("Jun Ma <majun258@huawei.com>");
+MODULE_AUTHOR("Yun Wu <wuyun.wu@huawei.com>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Hisilicon MBI Generator driver");
diff --git a/drivers/irqchip/irq-omap-intc.c b/drivers/irqchip/irq-omap-intc.c
index 8587d0f8d8c0..9d1bcfc33e4c 100644
--- a/drivers/irqchip/irq-omap-intc.c
+++ b/drivers/irqchip/irq-omap-intc.c
@@ -47,6 +47,7 @@
#define INTC_ILR0 0x0100
#define ACTIVEIRQ_MASK 0x7f /* omap2/3 active interrupt bits */
+#define SPURIOUSIRQ_MASK (0x1ffffff << 7)
#define INTCPS_NR_ILR_REGS 128
#define INTCPS_NR_MIR_REGS 4
@@ -207,7 +208,6 @@ static int __init omap_alloc_gc_of(struct irq_domain *d, void __iomem *base)
ct = gc->chip_types;
ct->type = IRQ_TYPE_LEVEL_MASK;
- ct->handler = handle_level_irq;
ct->chip.irq_ack = omap_mask_ack_irq;
ct->chip.irq_mask = irq_gc_mask_disable_reg;
@@ -330,11 +330,35 @@ static int __init omap_init_irq(u32 base, struct device_node *node)
static asmlinkage void __exception_irq_entry
omap_intc_handle_irq(struct pt_regs *regs)
{
+ extern unsigned long irq_err_count;
u32 irqnr;
irqnr = intc_readl(INTC_SIR);
+
+ /*
+ * A spurious IRQ can result if interrupt that triggered the
+ * sorting is no longer active during the sorting (10 INTC
+ * functional clock cycles after interrupt assertion). Or a
+ * change in interrupt mask affected the result during sorting
+ * time. There is no special handling required except ignoring
+ * the SIR register value just read and retrying.
+ * See section 6.2.5 of AM335x TRM Literature Number: SPRUH73K
+ *
+ * Many a times, a spurious interrupt situation has been fixed
+ * by adding a flush for the posted write acking the IRQ in
+ * the device driver. Typically, this is going be the device
+ * driver whose interrupt was handled just before the spurious
+ * IRQ occurred. Pay attention to those device drivers if you
+ * run into hitting the spurious IRQ condition below.
+ */
+ if (unlikely((irqnr & SPURIOUSIRQ_MASK) == SPURIOUSIRQ_MASK)) {
+ pr_err_once("%s: spurious irq!\n", __func__);
+ irq_err_count++;
+ omap_ack_irq(NULL);
+ return;
+ }
+
irqnr &= ACTIVEIRQ_MASK;
- WARN_ONCE(!irqnr, "Spurious IRQ ?\n");
handle_domain_irq(domain, irqnr, regs);
}
diff --git a/drivers/irqchip/irq-renesas-h8300h.c b/drivers/irqchip/irq-renesas-h8300h.c
index 6fd30d5ee14d..c378768d75b3 100644
--- a/drivers/irqchip/irq-renesas-h8300h.c
+++ b/drivers/irqchip/irq-renesas-h8300h.c
@@ -21,9 +21,9 @@ static const char ipr_bit[] = {
10, 10, 10, 10, 9, 9, 9, 9,
};
-static void *intc_baseaddr;
+static void __iomem *intc_baseaddr;
-#define IPR ((unsigned long)intc_baseaddr + 6)
+#define IPR (intc_baseaddr + 6)
static void h8300h_disable_irq(struct irq_data *data)
{
@@ -81,8 +81,8 @@ static int __init h8300h_intc_of_init(struct device_node *intc,
BUG_ON(!intc_baseaddr);
/* All interrupt priority low */
- ctrl_outb(0x00, IPR + 0);
- ctrl_outb(0x00, IPR + 1);
+ writeb(0x00, IPR + 0);
+ writeb(0x00, IPR + 1);
domain = irq_domain_add_linear(intc, NR_IRQS, &irq_ops, NULL);
BUG_ON(!domain);
diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c
index c325806561be..713177d97c7a 100644
--- a/drivers/irqchip/irq-renesas-intc-irqpin.c
+++ b/drivers/irqchip/irq-renesas-intc-irqpin.c
@@ -31,7 +31,6 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/of_device.h>
-#include <linux/platform_data/irq-renesas-intc-irqpin.h>
#include <linux/pm_runtime.h>
#define INTC_IRQPIN_MAX 8 /* maximum 8 interrupts per driver instance */
@@ -75,18 +74,20 @@ struct intc_irqpin_irq {
struct intc_irqpin_priv {
struct intc_irqpin_iomem iomem[INTC_IRQPIN_REG_NR];
struct intc_irqpin_irq irq[INTC_IRQPIN_MAX];
- struct renesas_intc_irqpin_config config;
- unsigned int number_of_irqs;
+ unsigned int sense_bitfield_width;
struct platform_device *pdev;
struct irq_chip irq_chip;
struct irq_domain *irq_domain;
struct clk *clk;
- bool shared_irqs;
+ unsigned shared_irqs:1;
+ unsigned needs_clk:1;
u8 shared_irq_mask;
};
-struct intc_irqpin_irlm_config {
+struct intc_irqpin_config {
unsigned int irlm_bit;
+ unsigned needs_irlm:1;
+ unsigned needs_clk:1;
};
static unsigned long intc_irqpin_read32(void __iomem *iomem)
@@ -171,7 +172,7 @@ static void intc_irqpin_mask_unmask_prio(struct intc_irqpin_priv *p,
static int intc_irqpin_set_sense(struct intc_irqpin_priv *p, int irq, int value)
{
/* The SENSE register is assumed to be 32-bit. */
- int bitfield_width = p->config.sense_bitfield_width;
+ int bitfield_width = p->sense_bitfield_width;
int shift = 32 - (irq + 1) * bitfield_width;
dev_dbg(&p->pdev->dev, "sense irq = %d, mode = %d\n", irq, value);
@@ -361,8 +362,15 @@ static const struct irq_domain_ops intc_irqpin_irq_domain_ops = {
.xlate = irq_domain_xlate_twocell,
};
-static const struct intc_irqpin_irlm_config intc_irqpin_irlm_r8a777x = {
+static const struct intc_irqpin_config intc_irqpin_irlm_r8a777x = {
.irlm_bit = 23, /* ICR0.IRLM0 */
+ .needs_irlm = 1,
+ .needs_clk = 0,
+};
+
+static const struct intc_irqpin_config intc_irqpin_rmobile = {
+ .needs_irlm = 0,
+ .needs_clk = 1,
};
static const struct of_device_id intc_irqpin_dt_ids[] = {
@@ -371,14 +379,18 @@ static const struct of_device_id intc_irqpin_dt_ids[] = {
.data = &intc_irqpin_irlm_r8a777x },
{ .compatible = "renesas,intc-irqpin-r8a7779",
.data = &intc_irqpin_irlm_r8a777x },
+ { .compatible = "renesas,intc-irqpin-r8a7740",
+ .data = &intc_irqpin_rmobile },
+ { .compatible = "renesas,intc-irqpin-sh73a0",
+ .data = &intc_irqpin_rmobile },
{},
};
MODULE_DEVICE_TABLE(of, intc_irqpin_dt_ids);
static int intc_irqpin_probe(struct platform_device *pdev)
{
+ const struct intc_irqpin_config *config = NULL;
struct device *dev = &pdev->dev;
- struct renesas_intc_irqpin_config *pdata = dev->platform_data;
const struct of_device_id *of_id;
struct intc_irqpin_priv *p;
struct intc_irqpin_iomem *i;
@@ -388,6 +400,8 @@ static int intc_irqpin_probe(struct platform_device *pdev)
void (*enable_fn)(struct irq_data *d);
void (*disable_fn)(struct irq_data *d);
const char *name = dev_name(dev);
+ bool control_parent;
+ unsigned int nirqs;
int ref_irq;
int ret;
int k;
@@ -399,23 +413,28 @@ static int intc_irqpin_probe(struct platform_device *pdev)
}
/* deal with driver instance configuration */
- if (pdata) {
- memcpy(&p->config, pdata, sizeof(*pdata));
- } else {
- of_property_read_u32(dev->of_node, "sense-bitfield-width",
- &p->config.sense_bitfield_width);
- p->config.control_parent = of_property_read_bool(dev->of_node,
- "control-parent");
- }
- if (!p->config.sense_bitfield_width)
- p->config.sense_bitfield_width = 4; /* default to 4 bits */
+ of_property_read_u32(dev->of_node, "sense-bitfield-width",
+ &p->sense_bitfield_width);
+ control_parent = of_property_read_bool(dev->of_node, "control-parent");
+ if (!p->sense_bitfield_width)
+ p->sense_bitfield_width = 4; /* default to 4 bits */
p->pdev = pdev;
platform_set_drvdata(pdev, p);
+ of_id = of_match_device(intc_irqpin_dt_ids, dev);
+ if (of_id && of_id->data) {
+ config = of_id->data;
+ p->needs_clk = config->needs_clk;
+ }
+
p->clk = devm_clk_get(dev, NULL);
if (IS_ERR(p->clk)) {
- dev_warn(dev, "unable to get clock\n");
+ if (p->needs_clk) {
+ dev_err(dev, "unable to get clock\n");
+ ret = PTR_ERR(p->clk);
+ goto err0;
+ }
p->clk = NULL;
}
@@ -443,8 +462,8 @@ static int intc_irqpin_probe(struct platform_device *pdev)
p->irq[k].requested_irq = irq->start;
}
- p->number_of_irqs = k;
- if (p->number_of_irqs < 1) {
+ nirqs = k;
+ if (nirqs < 1) {
dev_err(dev, "not enough IRQ resources\n");
ret = -EINVAL;
goto err0;
@@ -485,20 +504,16 @@ static int intc_irqpin_probe(struct platform_device *pdev)
}
/* configure "individual IRQ mode" where needed */
- of_id = of_match_device(intc_irqpin_dt_ids, dev);
- if (of_id && of_id->data) {
- const struct intc_irqpin_irlm_config *irlm_config = of_id->data;
-
+ if (config && config->needs_irlm) {
if (io[INTC_IRQPIN_REG_IRLM])
intc_irqpin_read_modify_write(p, INTC_IRQPIN_REG_IRLM,
- irlm_config->irlm_bit,
- 1, 1);
+ config->irlm_bit, 1, 1);
else
dev_warn(dev, "unable to select IRLM mode\n");
}
/* mask all interrupts using priority */
- for (k = 0; k < p->number_of_irqs; k++)
+ for (k = 0; k < nirqs; k++)
intc_irqpin_mask_unmask_prio(p, k, 1);
/* clear all pending interrupts */
@@ -506,16 +521,16 @@ static int intc_irqpin_probe(struct platform_device *pdev)
/* scan for shared interrupt lines */
ref_irq = p->irq[0].requested_irq;
- p->shared_irqs = true;
- for (k = 1; k < p->number_of_irqs; k++) {
+ p->shared_irqs = 1;
+ for (k = 1; k < nirqs; k++) {
if (ref_irq != p->irq[k].requested_irq) {
- p->shared_irqs = false;
+ p->shared_irqs = 0;
break;
}
}
/* use more severe masking method if requested */
- if (p->config.control_parent) {
+ if (control_parent) {
enable_fn = intc_irqpin_irq_enable_force;
disable_fn = intc_irqpin_irq_disable_force;
} else if (!p->shared_irqs) {
@@ -534,9 +549,7 @@ static int intc_irqpin_probe(struct platform_device *pdev)
irq_chip->irq_set_wake = intc_irqpin_irq_set_wake;
irq_chip->flags = IRQCHIP_MASK_ON_SUSPEND;
- p->irq_domain = irq_domain_add_simple(dev->of_node,
- p->number_of_irqs,
- p->config.irq_base,
+ p->irq_domain = irq_domain_add_simple(dev->of_node, nirqs, 0,
&intc_irqpin_irq_domain_ops, p);
if (!p->irq_domain) {
ret = -ENXIO;
@@ -555,7 +568,7 @@ static int intc_irqpin_probe(struct platform_device *pdev)
}
} else {
/* request interrupts one by one */
- for (k = 0; k < p->number_of_irqs; k++) {
+ for (k = 0; k < nirqs; k++) {
if (devm_request_irq(dev, p->irq[k].requested_irq,
intc_irqpin_irq_handler, 0, name,
&p->irq[k])) {
@@ -567,17 +580,10 @@ static int intc_irqpin_probe(struct platform_device *pdev)
}
/* unmask all interrupts on prio level */
- for (k = 0; k < p->number_of_irqs; k++)
+ for (k = 0; k < nirqs; k++)
intc_irqpin_mask_unmask_prio(p, k, 0);
- dev_info(dev, "driving %d irqs\n", p->number_of_irqs);
-
- /* warn in case of mismatch if irq base is specified */
- if (p->config.irq_base) {
- if (p->config.irq_base != p->irq[0].domain_irq)
- dev_warn(dev, "irq base mismatch (%d/%d)\n",
- p->config.irq_base, p->irq[0].domain_irq);
- }
+ dev_info(dev, "driving %d irqs\n", nirqs);
return 0;
diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c
index 4ef178078e5b..0820f67cc9a7 100644
--- a/drivers/irqchip/irq-sunxi-nmi.c
+++ b/drivers/irqchip/irq-sunxi-nmi.c
@@ -50,6 +50,12 @@ static struct sunxi_sc_nmi_reg_offs sun6i_reg_offs = {
.enable = 0x34,
};
+static struct sunxi_sc_nmi_reg_offs sun9i_reg_offs = {
+ .ctrl = 0x00,
+ .pend = 0x08,
+ .enable = 0x04,
+};
+
static inline void sunxi_sc_nmi_write(struct irq_chip_generic *gc, u32 off,
u32 val)
{
@@ -207,3 +213,10 @@ static int __init sun7i_sc_nmi_irq_init(struct device_node *node,
return sunxi_sc_nmi_irq_init(node, &sun7i_reg_offs);
}
IRQCHIP_DECLARE(sun7i_sc_nmi, "allwinner,sun7i-a20-sc-nmi", sun7i_sc_nmi_irq_init);
+
+static int __init sun9i_nmi_irq_init(struct device_node *node,
+ struct device_node *parent)
+{
+ return sunxi_sc_nmi_irq_init(node, &sun9i_reg_offs);
+}
+IRQCHIP_DECLARE(sun9i_nmi, "allwinner,sun9i-a80-nmi", sun9i_nmi_irq_init);
diff --git a/drivers/irqchip/irq-ts4800.c b/drivers/irqchip/irq-ts4800.c
new file mode 100644
index 000000000000..4192bdcd2734
--- /dev/null
+++ b/drivers/irqchip/irq-ts4800.c
@@ -0,0 +1,163 @@
+/*
+ * Multiplexed-IRQs driver for TS-4800's FPGA
+ *
+ * Copyright (c) 2015 - Savoir-faire Linux
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+
+#define IRQ_MASK 0x4
+#define IRQ_STATUS 0x8
+
+struct ts4800_irq_data {
+ void __iomem *base;
+ struct irq_domain *domain;
+ struct irq_chip irq_chip;
+};
+
+static void ts4800_irq_mask(struct irq_data *d)
+{
+ struct ts4800_irq_data *data = irq_data_get_irq_chip_data(d);
+ u16 reg = readw(data->base + IRQ_MASK);
+ u16 mask = 1 << d->hwirq;
+
+ writew(reg | mask, data->base + IRQ_MASK);
+}
+
+static void ts4800_irq_unmask(struct irq_data *d)
+{
+ struct ts4800_irq_data *data = irq_data_get_irq_chip_data(d);
+ u16 reg = readw(data->base + IRQ_MASK);
+ u16 mask = 1 << d->hwirq;
+
+ writew(reg & ~mask, data->base + IRQ_MASK);
+}
+
+static int ts4800_irqdomain_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hwirq)
+{
+ struct ts4800_irq_data *data = d->host_data;
+
+ irq_set_chip_and_handler(irq, &data->irq_chip, handle_simple_irq);
+ irq_set_chip_data(irq, data);
+ irq_set_noprobe(irq);
+
+ return 0;
+}
+
+struct irq_domain_ops ts4800_ic_ops = {
+ .map = ts4800_irqdomain_map,
+ .xlate = irq_domain_xlate_onecell,
+};
+
+static void ts4800_ic_chained_handle_irq(struct irq_desc *desc)
+{
+ struct ts4800_irq_data *data = irq_desc_get_handler_data(desc);
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ u16 status = readw(data->base + IRQ_STATUS);
+
+ chained_irq_enter(chip, desc);
+
+ if (unlikely(status == 0)) {
+ handle_bad_irq(desc);
+ goto out;
+ }
+
+ do {
+ unsigned int bit = __ffs(status);
+ int irq = irq_find_mapping(data->domain, bit);
+
+ status &= ~(1 << bit);
+ generic_handle_irq(irq);
+ } while (status);
+
+out:
+ chained_irq_exit(chip, desc);
+}
+
+static int ts4800_ic_probe(struct platform_device *pdev)
+{
+ struct device_node *node = pdev->dev.of_node;
+ struct ts4800_irq_data *data;
+ struct irq_chip *irq_chip;
+ struct resource *res;
+ int parent_irq;
+
+ data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ data->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(data->base))
+ return PTR_ERR(data->base);
+
+ writew(0xFFFF, data->base + IRQ_MASK);
+
+ parent_irq = irq_of_parse_and_map(node, 0);
+ if (!parent_irq) {
+ dev_err(&pdev->dev, "failed to get parent IRQ\n");
+ return -EINVAL;
+ }
+
+ irq_chip = &data->irq_chip;
+ irq_chip->name = dev_name(&pdev->dev);
+ irq_chip->irq_mask = ts4800_irq_mask;
+ irq_chip->irq_unmask = ts4800_irq_unmask;
+
+ data->domain = irq_domain_add_linear(node, 8, &ts4800_ic_ops, data);
+ if (!data->domain) {
+ dev_err(&pdev->dev, "cannot add IRQ domain\n");
+ return -ENOMEM;
+ }
+
+ irq_set_chained_handler_and_data(parent_irq,
+ ts4800_ic_chained_handle_irq, data);
+
+ platform_set_drvdata(pdev, data);
+
+ return 0;
+}
+
+static int ts4800_ic_remove(struct platform_device *pdev)
+{
+ struct ts4800_irq_data *data = platform_get_drvdata(pdev);
+
+ irq_domain_remove(data->domain);
+
+ return 0;
+}
+
+static const struct of_device_id ts4800_ic_of_match[] = {
+ { .compatible = "technologic,ts4800-irqc", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, ts4800_ic_of_match);
+
+static struct platform_driver ts4800_ic_driver = {
+ .probe = ts4800_ic_probe,
+ .remove = ts4800_ic_remove,
+ .driver = {
+ .name = "ts4800-irqc",
+ .of_match_table = ts4800_ic_of_match,
+ },
+};
+module_platform_driver(ts4800_ic_driver);
+
+MODULE_AUTHOR("Damien Riegel <damien.riegel@savoirfairelinux.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:ts4800_irqc");
diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c
index 598ab3f0e0ac..cadf104e3074 100644
--- a/drivers/irqchip/irq-versatile-fpga.c
+++ b/drivers/irqchip/irq-versatile-fpga.c
@@ -210,7 +210,12 @@ int __init fpga_irq_of_init(struct device_node *node,
parent_irq = -1;
}
+#ifdef CONFIG_ARCH_VERSATILE
+ fpga_irq_init(base, node->name, IRQ_SIC_START, parent_irq, valid_mask,
+ node);
+#else
fpga_irq_init(base, node->name, 0, parent_irq, valid_mask, node);
+#endif
writel(clear_mask, base + IRQ_ENABLE_CLEAR);
writel(clear_mask, base + FIQ_ENABLE_CLEAR);
diff --git a/drivers/irqchip/irq-zevio.c b/drivers/irqchip/irq-zevio.c
index 4c48fa88a03d..cb9d8ec37507 100644
--- a/drivers/irqchip/irq-zevio.c
+++ b/drivers/irqchip/irq-zevio.c
@@ -43,8 +43,7 @@ static void __iomem *zevio_irq_io;
static void zevio_irq_ack(struct irq_data *irqd)
{
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(irqd);
- struct irq_chip_regs *regs =
- &container_of(irqd->chip, struct irq_chip_type, chip)->regs;
+ struct irq_chip_regs *regs = &irq_data_get_chip_type(irqd)->regs;
readl(gc->reg_base + regs->ack);
}
diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c
index 375be509e95f..2a506fe0c8a4 100644
--- a/drivers/isdn/gigaset/ser-gigaset.c
+++ b/drivers/isdn/gigaset/ser-gigaset.c
@@ -67,8 +67,7 @@ static int write_modem(struct cardstate *cs)
struct sk_buff *skb = bcs->tx_skb;
int sent = -EOPNOTSUPP;
- if (!tty || !tty->driver || !skb)
- return -EINVAL;
+ WARN_ON(!tty || !tty->ops || !skb);
if (!skb->len) {
dev_kfree_skb_any(skb);
@@ -109,8 +108,7 @@ static int send_cb(struct cardstate *cs)
unsigned long flags;
int sent = 0;
- if (!tty || !tty->driver)
- return -EFAULT;
+ WARN_ON(!tty || !tty->ops);
cb = cs->cmdbuf;
if (!cb)
@@ -370,19 +368,18 @@ static void gigaset_freecshw(struct cardstate *cs)
tasklet_kill(&cs->write_tasklet);
if (!cs->hw.ser)
return;
- dev_set_drvdata(&cs->hw.ser->dev.dev, NULL);
platform_device_unregister(&cs->hw.ser->dev);
- kfree(cs->hw.ser);
- cs->hw.ser = NULL;
}
static void gigaset_device_release(struct device *dev)
{
- struct platform_device *pdev = to_platform_device(dev);
+ struct cardstate *cs = dev_get_drvdata(dev);
- /* adapted from platform_device_release() in drivers/base/platform.c */
- kfree(dev->platform_data);
- kfree(pdev->resource);
+ if (!cs)
+ return;
+ dev_set_drvdata(dev, NULL);
+ kfree(cs->hw.ser);
+ cs->hw.ser = NULL;
}
/*
@@ -432,7 +429,9 @@ static int gigaset_set_modem_ctrl(struct cardstate *cs, unsigned old_state,
struct tty_struct *tty = cs->hw.ser->tty;
unsigned int set, clear;
- if (!tty || !tty->driver || !tty->ops->tiocmset)
+ WARN_ON(!tty || !tty->ops);
+ /* tiocmset is an optional tty driver method */
+ if (!tty->ops->tiocmset)
return -EINVAL;
set = new_state & ~old_state;
clear = old_state & ~new_state;
diff --git a/drivers/isdn/hardware/mISDN/mISDNipac.c b/drivers/isdn/hardware/mISDN/mISDNipac.c
index a77eea594b69..cb428b9ee441 100644
--- a/drivers/isdn/hardware/mISDN/mISDNipac.c
+++ b/drivers/isdn/hardware/mISDN/mISDNipac.c
@@ -1170,7 +1170,7 @@ mISDNipac_irq(struct ipac_hw *ipac, int maxloop)
if (ipac->type & IPAC_TYPE_IPACX) {
ista = ReadIPAC(ipac, ISACX_ISTA);
- while (ista && cnt--) {
+ while (ista && --cnt) {
pr_debug("%s: ISTA %02x\n", ipac->name, ista);
if (ista & IPACX__ICA)
ipac_irq(&ipac->hscx[0], ista);
@@ -1182,7 +1182,7 @@ mISDNipac_irq(struct ipac_hw *ipac, int maxloop)
}
} else if (ipac->type & IPAC_TYPE_IPAC) {
ista = ReadIPAC(ipac, IPAC_ISTA);
- while (ista && cnt--) {
+ while (ista && --cnt) {
pr_debug("%s: ISTA %02x\n", ipac->name, ista);
if (ista & (IPAC__ICD | IPAC__EXD)) {
istad = ReadISAC(isac, ISAC_ISTA);
@@ -1200,7 +1200,7 @@ mISDNipac_irq(struct ipac_hw *ipac, int maxloop)
ista = ReadIPAC(ipac, IPAC_ISTA);
}
} else if (ipac->type & IPAC_TYPE_HSCX) {
- while (cnt) {
+ while (--cnt) {
ista = ReadIPAC(ipac, IPAC_ISTAB + ipac->hscx[1].off);
pr_debug("%s: B2 ISTA %02x\n", ipac->name, ista);
if (ista)
@@ -1211,7 +1211,6 @@ mISDNipac_irq(struct ipac_hw *ipac, int maxloop)
mISDNisac_irq(isac, istad);
if (0 == (ista | istad))
break;
- cnt--;
}
}
if (cnt > maxloop) /* only for ISAC/HSCX without PCI IRQ test */
diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c
index b33f53b3ca93..bf04d2a3cf4a 100644
--- a/drivers/isdn/hisax/config.c
+++ b/drivers/isdn/hisax/config.c
@@ -1896,7 +1896,7 @@ static void EChannel_proc_rcv(struct hisax_d_if *d_if)
ptr--;
*ptr++ = '\n';
*ptr = 0;
- HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+ HiSax_putstatus(cs, NULL, cs->dlog);
} else
HiSax_putstatus(cs, "LogEcho: ",
"warning Frame too big (%d)",
diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c
index 4a4825528188..90449e1e91e5 100644
--- a/drivers/isdn/hisax/hfc_pci.c
+++ b/drivers/isdn/hisax/hfc_pci.c
@@ -901,7 +901,7 @@ Begin:
ptr--;
*ptr++ = '\n';
*ptr = 0;
- HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+ HiSax_putstatus(cs, NULL, cs->dlog);
} else
HiSax_putstatus(cs, "LogEcho: ", "warning Frame too big (%d)", total - 3);
}
diff --git a/drivers/isdn/hisax/hfc_sx.c b/drivers/isdn/hisax/hfc_sx.c
index b1fad81f0722..13b2151c10f5 100644
--- a/drivers/isdn/hisax/hfc_sx.c
+++ b/drivers/isdn/hisax/hfc_sx.c
@@ -674,7 +674,7 @@ receive_emsg(struct IsdnCardState *cs)
ptr--;
*ptr++ = '\n';
*ptr = 0;
- HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+ HiSax_putstatus(cs, NULL, cs->dlog);
} else
HiSax_putstatus(cs, "LogEcho: ", "warning Frame too big (%d)", skb->len);
}
diff --git a/drivers/isdn/hisax/q931.c b/drivers/isdn/hisax/q931.c
index b420f8bd862e..ba4beb25d872 100644
--- a/drivers/isdn/hisax/q931.c
+++ b/drivers/isdn/hisax/q931.c
@@ -1179,7 +1179,7 @@ LogFrame(struct IsdnCardState *cs, u_char *buf, int size)
dp--;
*dp++ = '\n';
*dp = 0;
- HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+ HiSax_putstatus(cs, NULL, cs->dlog);
} else
HiSax_putstatus(cs, "LogFrame: ", "warning Frame too big (%d)", size);
}
@@ -1246,7 +1246,7 @@ dlogframe(struct IsdnCardState *cs, struct sk_buff *skb, int dir)
}
if (finish) {
*dp = 0;
- HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+ HiSax_putstatus(cs, NULL, cs->dlog);
return;
}
if ((0xfe & buf[0]) == PROTO_DIS_N0) { /* 1TR6 */
@@ -1509,5 +1509,5 @@ dlogframe(struct IsdnCardState *cs, struct sk_buff *skb, int dir)
dp += sprintf(dp, "Unknown protocol %x!", buf[0]);
}
*dp = 0;
- HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+ HiSax_putstatus(cs, NULL, cs->dlog);
}
diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig
index a16bf56d3f28..85a339030e4b 100644
--- a/drivers/lightnvm/Kconfig
+++ b/drivers/lightnvm/Kconfig
@@ -18,6 +18,7 @@ if NVM
config NVM_DEBUG
bool "Open-Channel SSD debugging support"
+ default n
---help---
Exposes a debug management interface to create/remove targets at:
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index f659e605a406..8f41b245cd55 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -74,7 +74,7 @@ EXPORT_SYMBOL(nvm_unregister_target);
void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
dma_addr_t *dma_handler)
{
- return dev->ops->dev_dma_alloc(dev->q, dev->ppalist_pool, mem_flags,
+ return dev->ops->dev_dma_alloc(dev, dev->ppalist_pool, mem_flags,
dma_handler);
}
EXPORT_SYMBOL(nvm_dev_dma_alloc);
@@ -97,15 +97,47 @@ static struct nvmm_type *nvm_find_mgr_type(const char *name)
return NULL;
}
+struct nvmm_type *nvm_init_mgr(struct nvm_dev *dev)
+{
+ struct nvmm_type *mt;
+ int ret;
+
+ lockdep_assert_held(&nvm_lock);
+
+ list_for_each_entry(mt, &nvm_mgrs, list) {
+ ret = mt->register_mgr(dev);
+ if (ret < 0) {
+ pr_err("nvm: media mgr failed to init (%d) on dev %s\n",
+ ret, dev->name);
+ return NULL; /* initialization failed */
+ } else if (ret > 0)
+ return mt;
+ }
+
+ return NULL;
+}
+
int nvm_register_mgr(struct nvmm_type *mt)
{
+ struct nvm_dev *dev;
int ret = 0;
down_write(&nvm_lock);
- if (nvm_find_mgr_type(mt->name))
+ if (nvm_find_mgr_type(mt->name)) {
ret = -EEXIST;
- else
+ goto finish;
+ } else {
list_add(&mt->list, &nvm_mgrs);
+ }
+
+ /* try to register media mgr if any device have none configured */
+ list_for_each_entry(dev, &nvm_devices, devices) {
+ if (dev->mt)
+ continue;
+
+ dev->mt = nvm_init_mgr(dev);
+ }
+finish:
up_write(&nvm_lock);
return ret;
@@ -160,11 +192,6 @@ int nvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk)
}
EXPORT_SYMBOL(nvm_erase_blk);
-static void nvm_core_free(struct nvm_dev *dev)
-{
- kfree(dev);
-}
-
static int nvm_core_init(struct nvm_dev *dev)
{
struct nvm_id *id = &dev->identity;
@@ -179,12 +206,21 @@ static int nvm_core_init(struct nvm_dev *dev)
dev->sec_size = grp->csecs;
dev->oob_size = grp->sos;
dev->sec_per_pg = grp->fpg_sz / grp->csecs;
- dev->addr_mode = id->ppat;
- dev->addr_format = id->ppaf;
+ memcpy(&dev->ppaf, &id->ppaf, sizeof(struct nvm_addr_format));
dev->plane_mode = NVM_PLANE_SINGLE;
dev->max_rq_size = dev->ops->max_phys_sect * dev->sec_size;
+ if (grp->mtype != 0) {
+ pr_err("nvm: memory type not supported\n");
+ return -EINVAL;
+ }
+
+ if (grp->fmtype != 0 && grp->fmtype != 1) {
+ pr_err("nvm: flash type not supported\n");
+ return -EINVAL;
+ }
+
if (grp->mpos & 0x020202)
dev->plane_mode = NVM_PLANE_DOUBLE;
if (grp->mpos & 0x040404)
@@ -213,21 +249,17 @@ static void nvm_free(struct nvm_dev *dev)
if (dev->mt)
dev->mt->unregister_mgr(dev);
-
- nvm_core_free(dev);
}
static int nvm_init(struct nvm_dev *dev)
{
- struct nvmm_type *mt;
- int ret = 0;
+ int ret = -EINVAL;
if (!dev->q || !dev->ops)
- return -EINVAL;
+ return ret;
- if (dev->ops->identity(dev->q, &dev->identity)) {
+ if (dev->ops->identity(dev, &dev->identity)) {
pr_err("nvm: device could not be identified\n");
- ret = -EINVAL;
goto err;
}
@@ -251,29 +283,12 @@ static int nvm_init(struct nvm_dev *dev)
goto err;
}
- /* register with device with a supported manager */
- list_for_each_entry(mt, &nvm_mgrs, list) {
- ret = mt->register_mgr(dev);
- if (ret < 0)
- goto err; /* initialization failed */
- if (ret > 0) {
- dev->mt = mt;
- break; /* successfully initialized */
- }
- }
-
- if (!ret) {
- pr_info("nvm: no compatible manager found.\n");
- return 0;
- }
-
pr_info("nvm: registered %s [%u/%u/%u/%u/%u/%u]\n",
dev->name, dev->sec_per_pg, dev->nr_planes,
dev->pgs_per_blk, dev->blks_per_lun, dev->nr_luns,
dev->nr_chnls);
return 0;
err:
- nvm_free(dev);
pr_err("nvm: failed to initialize nvm\n");
return ret;
}
@@ -308,22 +323,27 @@ int nvm_register(struct request_queue *q, char *disk_name,
if (ret)
goto err_init;
- down_write(&nvm_lock);
- list_add(&dev->devices, &nvm_devices);
- up_write(&nvm_lock);
+ if (dev->ops->max_phys_sect > 256) {
+ pr_info("nvm: max sectors supported is 256.\n");
+ ret = -EINVAL;
+ goto err_init;
+ }
if (dev->ops->max_phys_sect > 1) {
- dev->ppalist_pool = dev->ops->create_dma_pool(dev->q,
- "ppalist");
+ dev->ppalist_pool = dev->ops->create_dma_pool(dev, "ppalist");
if (!dev->ppalist_pool) {
pr_err("nvm: could not create ppa pool\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err_init;
}
- } else if (dev->ops->max_phys_sect > 256) {
- pr_info("nvm: max sectors supported is 256.\n");
- return -EINVAL;
}
+ /* register device with a supported media manager */
+ down_write(&nvm_lock);
+ dev->mt = nvm_init_mgr(dev);
+ list_add(&dev->devices, &nvm_devices);
+ up_write(&nvm_lock);
+
return 0;
err_init:
kfree(dev);
@@ -333,19 +353,22 @@ EXPORT_SYMBOL(nvm_register);
void nvm_unregister(char *disk_name)
{
- struct nvm_dev *dev = nvm_find_nvm_dev(disk_name);
+ struct nvm_dev *dev;
+ down_write(&nvm_lock);
+ dev = nvm_find_nvm_dev(disk_name);
if (!dev) {
pr_err("nvm: could not find device %s to unregister\n",
disk_name);
+ up_write(&nvm_lock);
return;
}
- nvm_exit(dev);
-
- down_write(&nvm_lock);
list_del(&dev->devices);
up_write(&nvm_lock);
+
+ nvm_exit(dev);
+ kfree(dev);
}
EXPORT_SYMBOL(nvm_unregister);
@@ -358,38 +381,24 @@ static int nvm_create_target(struct nvm_dev *dev,
{
struct nvm_ioctl_create_simple *s = &create->conf.s;
struct request_queue *tqueue;
- struct nvmm_type *mt;
struct gendisk *tdisk;
struct nvm_tgt_type *tt;
struct nvm_target *t;
void *targetdata;
- int ret = 0;
if (!dev->mt) {
- /* register with device with a supported NVM manager */
- list_for_each_entry(mt, &nvm_mgrs, list) {
- ret = mt->register_mgr(dev);
- if (ret < 0)
- return ret; /* initialization failed */
- if (ret > 0) {
- dev->mt = mt;
- break; /* successfully initialized */
- }
- }
-
- if (!ret) {
- pr_info("nvm: no compatible nvm manager found.\n");
- return -ENODEV;
- }
+ pr_info("nvm: device has no media manager registered.\n");
+ return -ENODEV;
}
+ down_write(&nvm_lock);
tt = nvm_find_target_type(create->tgttype);
if (!tt) {
pr_err("nvm: target type %s not found\n", create->tgttype);
+ up_write(&nvm_lock);
return -EINVAL;
}
- down_write(&nvm_lock);
list_for_each_entry(t, &dev->online_targets, list) {
if (!strcmp(create->tgtname, t->disk->disk_name)) {
pr_err("nvm: target name already exists.\n");
@@ -457,11 +466,11 @@ static void nvm_remove_target(struct nvm_target *t)
lockdep_assert_held(&nvm_lock);
del_gendisk(tdisk);
+ blk_cleanup_queue(q);
+
if (tt->exit)
tt->exit(tdisk->private_data);
- blk_cleanup_queue(q);
-
put_disk(tdisk);
list_del(&t->list);
@@ -473,7 +482,9 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create)
struct nvm_dev *dev;
struct nvm_ioctl_create_simple *s;
+ down_write(&nvm_lock);
dev = nvm_find_nvm_dev(create->dev);
+ up_write(&nvm_lock);
if (!dev) {
pr_err("nvm: device not found\n");
return -EINVAL;
@@ -532,7 +543,9 @@ static int nvm_configure_show(const char *val)
return -EINVAL;
}
+ down_write(&nvm_lock);
dev = nvm_find_nvm_dev(devname);
+ up_write(&nvm_lock);
if (!dev) {
pr_err("nvm: device not found\n");
return -EINVAL;
@@ -541,7 +554,7 @@ static int nvm_configure_show(const char *val)
if (!dev->mt)
return 0;
- dev->mt->free_blocks_print(dev);
+ dev->mt->lun_info_print(dev);
return 0;
}
@@ -677,8 +690,10 @@ static long nvm_ioctl_info(struct file *file, void __user *arg)
info->tgtsize = tgt_iter;
up_write(&nvm_lock);
- if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info)))
+ if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info))) {
+ kfree(info);
return -EFAULT;
+ }
kfree(info);
return 0;
@@ -721,8 +736,11 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
devices->nr_devices = i;
- if (copy_to_user(arg, devices, sizeof(struct nvm_ioctl_get_devices)))
+ if (copy_to_user(arg, devices,
+ sizeof(struct nvm_ioctl_get_devices))) {
+ kfree(devices);
return -EFAULT;
+ }
kfree(devices);
return 0;
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index ae1fb2bdc5f4..a54b339951a3 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -60,23 +60,27 @@ static int gennvm_luns_init(struct nvm_dev *dev, struct gen_nvm *gn)
lun->vlun.lun_id = i % dev->luns_per_chnl;
lun->vlun.chnl_id = i / dev->luns_per_chnl;
lun->vlun.nr_free_blocks = dev->blks_per_lun;
+ lun->vlun.nr_inuse_blocks = 0;
+ lun->vlun.nr_bad_blocks = 0;
}
return 0;
}
-static int gennvm_block_bb(u32 lun_id, void *bb_bitmap, unsigned int nr_blocks,
+static int gennvm_block_bb(struct ppa_addr ppa, int nr_blocks, u8 *blks,
void *private)
{
struct gen_nvm *gn = private;
- struct gen_lun *lun = &gn->luns[lun_id];
+ struct nvm_dev *dev = gn->dev;
+ struct gen_lun *lun;
struct nvm_block *blk;
int i;
- if (unlikely(bitmap_empty(bb_bitmap, nr_blocks)))
- return 0;
+ lun = &gn->luns[(dev->luns_per_chnl * ppa.g.ch) + ppa.g.lun];
+
+ for (i = 0; i < nr_blocks; i++) {
+ if (blks[i] == 0)
+ continue;
- i = -1;
- while ((i = find_next_bit(bb_bitmap, nr_blocks, i + 1)) < nr_blocks) {
blk = &lun->vlun.blocks[i];
if (!blk) {
pr_err("gennvm: BB data is out of bounds.\n");
@@ -84,6 +88,7 @@ static int gennvm_block_bb(u32 lun_id, void *bb_bitmap, unsigned int nr_blocks,
}
list_move_tail(&blk->list, &lun->bb_list);
+ lun->vlun.nr_bad_blocks++;
}
return 0;
@@ -136,6 +141,7 @@ static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private)
list_move_tail(&blk->list, &lun->used_list);
blk->type = 1;
lun->vlun.nr_free_blocks--;
+ lun->vlun.nr_inuse_blocks++;
}
}
@@ -164,22 +170,32 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
block->id = cur_block_id++;
/* First block is reserved for device */
- if (unlikely(lun_iter == 0 && blk_iter == 0))
+ if (unlikely(lun_iter == 0 && blk_iter == 0)) {
+ lun->vlun.nr_free_blocks--;
continue;
+ }
list_add_tail(&block->list, &lun->free_list);
}
if (dev->ops->get_bb_tbl) {
- ret = dev->ops->get_bb_tbl(dev->q, lun->vlun.id,
- dev->blks_per_lun, gennvm_block_bb, gn);
+ struct ppa_addr ppa;
+
+ ppa.ppa = 0;
+ ppa.g.ch = lun->vlun.chnl_id;
+ ppa.g.lun = lun->vlun.id;
+ ppa = generic_to_dev_addr(dev, ppa);
+
+ ret = dev->ops->get_bb_tbl(dev, ppa,
+ dev->blks_per_lun,
+ gennvm_block_bb, gn);
if (ret)
pr_err("gennvm: could not read BB table\n");
}
}
if (dev->ops->get_l2p_tbl) {
- ret = dev->ops->get_l2p_tbl(dev->q, 0, dev->total_pages,
+ ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_pages,
gennvm_block_map, dev);
if (ret) {
pr_err("gennvm: could not read L2P table.\n");
@@ -190,15 +206,27 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
return 0;
}
+static void gennvm_free(struct nvm_dev *dev)
+{
+ gennvm_blocks_free(dev);
+ gennvm_luns_free(dev);
+ kfree(dev->mp);
+ dev->mp = NULL;
+}
+
static int gennvm_register(struct nvm_dev *dev)
{
struct gen_nvm *gn;
int ret;
+ if (!try_module_get(THIS_MODULE))
+ return -ENODEV;
+
gn = kzalloc(sizeof(struct gen_nvm), GFP_KERNEL);
if (!gn)
return -ENOMEM;
+ gn->dev = dev;
gn->nr_luns = dev->nr_luns;
dev->mp = gn;
@@ -216,16 +244,15 @@ static int gennvm_register(struct nvm_dev *dev)
return 1;
err:
- kfree(gn);
+ gennvm_free(dev);
+ module_put(THIS_MODULE);
return ret;
}
static void gennvm_unregister(struct nvm_dev *dev)
{
- gennvm_blocks_free(dev);
- gennvm_luns_free(dev);
- kfree(dev->mp);
- dev->mp = NULL;
+ gennvm_free(dev);
+ module_put(THIS_MODULE);
}
static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
@@ -240,23 +267,21 @@ static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
if (list_empty(&lun->free_list)) {
pr_err_ratelimited("gennvm: lun %u have no free pages available",
lun->vlun.id);
- spin_unlock(&vlun->lock);
goto out;
}
- while (!is_gc && lun->vlun.nr_free_blocks < lun->reserved_blocks) {
- spin_unlock(&vlun->lock);
+ if (!is_gc && lun->vlun.nr_free_blocks < lun->reserved_blocks)
goto out;
- }
blk = list_first_entry(&lun->free_list, struct nvm_block, list);
list_move_tail(&blk->list, &lun->used_list);
blk->type = 1;
lun->vlun.nr_free_blocks--;
+ lun->vlun.nr_inuse_blocks++;
- spin_unlock(&vlun->lock);
out:
+ spin_unlock(&vlun->lock);
return blk;
}
@@ -271,16 +296,21 @@ static void gennvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
case 1:
list_move_tail(&blk->list, &lun->free_list);
lun->vlun.nr_free_blocks++;
+ lun->vlun.nr_inuse_blocks--;
blk->type = 0;
break;
case 2:
list_move_tail(&blk->list, &lun->bb_list);
+ lun->vlun.nr_bad_blocks++;
+ lun->vlun.nr_inuse_blocks--;
break;
default:
WARN_ON_ONCE(1);
pr_err("gennvm: erroneous block type (%lu -> %u)\n",
blk->id, blk->type);
list_move_tail(&blk->list, &lun->bb_list);
+ lun->vlun.nr_bad_blocks++;
+ lun->vlun.nr_inuse_blocks--;
}
spin_unlock(&vlun->lock);
@@ -292,10 +322,10 @@ static void gennvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
if (rqd->nr_pages > 1) {
for (i = 0; i < rqd->nr_pages; i++)
- rqd->ppa_list[i] = addr_to_generic_mode(dev,
+ rqd->ppa_list[i] = dev_to_generic_addr(dev,
rqd->ppa_list[i]);
} else {
- rqd->ppa_addr = addr_to_generic_mode(dev, rqd->ppa_addr);
+ rqd->ppa_addr = dev_to_generic_addr(dev, rqd->ppa_addr);
}
}
@@ -305,10 +335,10 @@ static void gennvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
if (rqd->nr_pages > 1) {
for (i = 0; i < rqd->nr_pages; i++)
- rqd->ppa_list[i] = generic_to_addr_mode(dev,
+ rqd->ppa_list[i] = generic_to_dev_addr(dev,
rqd->ppa_list[i]);
} else {
- rqd->ppa_addr = generic_to_addr_mode(dev, rqd->ppa_addr);
+ rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr);
}
}
@@ -321,7 +351,7 @@ static int gennvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
gennvm_generic_to_addr_mode(dev, rqd);
rqd->dev = dev;
- return dev->ops->submit_io(dev->q, rqd);
+ return dev->ops->submit_io(dev, rqd);
}
static void gennvm_blk_set_type(struct nvm_dev *dev, struct ppa_addr *ppa,
@@ -354,10 +384,10 @@ static void gennvm_mark_blk_bad(struct nvm_dev *dev, struct nvm_rq *rqd)
{
int i;
- if (!dev->ops->set_bb)
+ if (!dev->ops->set_bb_tbl)
return;
- if (dev->ops->set_bb(dev->q, rqd, 1))
+ if (dev->ops->set_bb_tbl(dev, rqd, 1))
return;
gennvm_addr_to_generic_mode(dev, rqd);
@@ -425,7 +455,7 @@ static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
gennvm_generic_to_addr_mode(dev, &rqd);
- ret = dev->ops->erase_block(dev->q, &rqd);
+ ret = dev->ops->erase_block(dev, &rqd);
if (plane_cnt)
nvm_dev_dma_free(dev, rqd.ppa_list, rqd.dma_ppa_list);
@@ -440,15 +470,24 @@ static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
return &gn->luns[lunid].vlun;
}
-static void gennvm_free_blocks_print(struct nvm_dev *dev)
+static void gennvm_lun_info_print(struct nvm_dev *dev)
{
struct gen_nvm *gn = dev->mp;
struct gen_lun *lun;
unsigned int i;
- gennvm_for_each_lun(gn, lun, i)
- pr_info("%s: lun%8u\t%u\n",
- dev->name, i, lun->vlun.nr_free_blocks);
+
+ gennvm_for_each_lun(gn, lun, i) {
+ spin_lock(&lun->vlun.lock);
+
+ pr_info("%s: lun%8u\t%u\t%u\t%u\n",
+ dev->name, i,
+ lun->vlun.nr_free_blocks,
+ lun->vlun.nr_inuse_blocks,
+ lun->vlun.nr_bad_blocks);
+
+ spin_unlock(&lun->vlun.lock);
+ }
}
static struct nvmm_type gennvm = {
@@ -466,7 +505,7 @@ static struct nvmm_type gennvm = {
.erase_blk = gennvm_erase_blk,
.get_lun = gennvm_get_lun,
- .free_blocks_print = gennvm_free_blocks_print,
+ .lun_info_print = gennvm_lun_info_print,
};
static int __init gennvm_module_init(void)
diff --git a/drivers/lightnvm/gennvm.h b/drivers/lightnvm/gennvm.h
index d23bd3501ddc..9c24b5b32dac 100644
--- a/drivers/lightnvm/gennvm.h
+++ b/drivers/lightnvm/gennvm.h
@@ -35,6 +35,8 @@ struct gen_lun {
};
struct gen_nvm {
+ struct nvm_dev *dev;
+
int nr_luns;
struct gen_lun *luns;
};
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 7ba64c87ba1c..134e4faba482 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -123,12 +123,42 @@ static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
return blk->id * rrpc->dev->pgs_per_blk;
}
+static struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev,
+ struct ppa_addr r)
+{
+ struct ppa_addr l;
+ int secs, pgs, blks, luns;
+ sector_t ppa = r.ppa;
+
+ l.ppa = 0;
+
+ div_u64_rem(ppa, dev->sec_per_pg, &secs);
+ l.g.sec = secs;
+
+ sector_div(ppa, dev->sec_per_pg);
+ div_u64_rem(ppa, dev->sec_per_blk, &pgs);
+ l.g.pg = pgs;
+
+ sector_div(ppa, dev->pgs_per_blk);
+ div_u64_rem(ppa, dev->blks_per_lun, &blks);
+ l.g.blk = blks;
+
+ sector_div(ppa, dev->blks_per_lun);
+ div_u64_rem(ppa, dev->luns_per_chnl, &luns);
+ l.g.lun = luns;
+
+ sector_div(ppa, dev->luns_per_chnl);
+ l.g.ch = ppa;
+
+ return l;
+}
+
static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_dev *dev, u64 addr)
{
struct ppa_addr paddr;
paddr.ppa = addr;
- return __linear_to_generic_addr(dev, paddr);
+ return linear_to_generic_addr(dev, paddr);
}
/* requires lun->lock taken */
@@ -152,7 +182,7 @@ static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
struct nvm_block *blk;
struct rrpc_block *rblk;
- blk = nvm_get_blk(rrpc->dev, rlun->parent, 0);
+ blk = nvm_get_blk(rrpc->dev, rlun->parent, flags);
if (!blk)
return NULL;
@@ -172,6 +202,20 @@ static void rrpc_put_blk(struct rrpc *rrpc, struct rrpc_block *rblk)
nvm_put_blk(rrpc->dev, rblk->parent);
}
+static void rrpc_put_blks(struct rrpc *rrpc)
+{
+ struct rrpc_lun *rlun;
+ int i;
+
+ for (i = 0; i < rrpc->nr_luns; i++) {
+ rlun = &rrpc->luns[i];
+ if (rlun->cur)
+ rrpc_put_blk(rrpc, rlun->cur);
+ if (rlun->gc_cur)
+ rrpc_put_blk(rrpc, rlun->gc_cur);
+ }
+}
+
static struct rrpc_lun *get_next_lun(struct rrpc *rrpc)
{
int next = atomic_inc_return(&rrpc->next_lun);
@@ -972,7 +1016,7 @@ static int rrpc_map_init(struct rrpc *rrpc)
return 0;
/* Bring up the mapping table from device */
- ret = dev->ops->get_l2p_tbl(dev->q, 0, dev->total_pages,
+ ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_pages,
rrpc_l2p_update, rrpc);
if (ret) {
pr_err("nvm: rrpc: could not read L2P table.\n");
@@ -1194,18 +1238,21 @@ static int rrpc_luns_configure(struct rrpc *rrpc)
rblk = rrpc_get_blk(rrpc, rlun, 0);
if (!rblk)
- return -EINVAL;
+ goto err;
rrpc_set_lun_cur(rlun, rblk);
/* Emergency gc block */
rblk = rrpc_get_blk(rrpc, rlun, 1);
if (!rblk)
- return -EINVAL;
+ goto err;
rlun->gc_cur = rblk;
}
return 0;
+err:
+ rrpc_put_blks(rrpc);
+ return -EINVAL;
}
static struct nvm_tgt_type tt_rrpc;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 917d47e290ae..3147c8d09ea8 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -112,7 +112,8 @@ struct iv_tcw_private {
* and encrypts / decrypts at the same time.
*/
enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
- DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD };
+ DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD,
+ DM_CRYPT_EXIT_THREAD};
/*
* The fields in here must be read only after initialization.
@@ -1203,20 +1204,18 @@ continue_locked:
if (!RB_EMPTY_ROOT(&cc->write_tree))
goto pop_from_list;
+ if (unlikely(test_bit(DM_CRYPT_EXIT_THREAD, &cc->flags))) {
+ spin_unlock_irq(&cc->write_thread_wait.lock);
+ break;
+ }
+
__set_current_state(TASK_INTERRUPTIBLE);
__add_wait_queue(&cc->write_thread_wait, &wait);
spin_unlock_irq(&cc->write_thread_wait.lock);
- if (unlikely(kthread_should_stop())) {
- set_task_state(current, TASK_RUNNING);
- remove_wait_queue(&cc->write_thread_wait, &wait);
- break;
- }
-
schedule();
- set_task_state(current, TASK_RUNNING);
spin_lock_irq(&cc->write_thread_wait.lock);
__remove_wait_queue(&cc->write_thread_wait, &wait);
goto continue_locked;
@@ -1531,8 +1530,13 @@ static void crypt_dtr(struct dm_target *ti)
if (!cc)
return;
- if (cc->write_thread)
+ if (cc->write_thread) {
+ spin_lock_irq(&cc->write_thread_wait.lock);
+ set_bit(DM_CRYPT_EXIT_THREAD, &cc->flags);
+ wake_up_locked(&cc->write_thread_wait);
+ spin_unlock_irq(&cc->write_thread_wait.lock);
kthread_stop(cc->write_thread);
+ }
if (cc->io_queue)
destroy_workqueue(cc->io_queue);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index aaa6caa46a9f..cfa29f574c2a 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1537,32 +1537,34 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
struct block_device **bdev, fmode_t *mode)
{
struct multipath *m = ti->private;
- struct pgpath *pgpath;
unsigned long flags;
int r;
- r = 0;
-
spin_lock_irqsave(&m->lock, flags);
if (!m->current_pgpath)
__choose_pgpath(m, 0);
- pgpath = m->current_pgpath;
-
- if (pgpath) {
- *bdev = pgpath->path.dev->bdev;
- *mode = pgpath->path.dev->mode;
+ if (m->current_pgpath) {
+ if (!m->queue_io) {
+ *bdev = m->current_pgpath->path.dev->bdev;
+ *mode = m->current_pgpath->path.dev->mode;
+ r = 0;
+ } else {
+ /* pg_init has not started or completed */
+ r = -ENOTCONN;
+ }
+ } else {
+ /* No path is available */
+ if (m->queue_if_no_path)
+ r = -ENOTCONN;
+ else
+ r = -EIO;
}
- if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path))
- r = -ENOTCONN;
- else if (!*bdev)
- r = -EIO;
-
spin_unlock_irqrestore(&m->lock, flags);
- if (r == -ENOTCONN && !fatal_signal_pending(current)) {
+ if (r == -ENOTCONN) {
spin_lock_irqsave(&m->lock, flags);
if (!m->current_pg) {
/* Path status changed, redo selection */
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 1fa45695b68a..c219a053c7f6 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1207,6 +1207,12 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
dm_block_t held_root;
/*
+ * We commit to ensure the btree roots which we increment in a
+ * moment are up to date.
+ */
+ __commit_transaction(pmd);
+
+ /*
* Copy the superblock.
*/
dm_sm_inc_block(pmd->metadata_sm, THIN_SUPERBLOCK_LOCATION);
@@ -1538,7 +1544,7 @@ static int __remove(struct dm_thin_device *td, dm_block_t block)
static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_t end)
{
int r;
- unsigned count;
+ unsigned count, total_count = 0;
struct dm_pool_metadata *pmd = td->pmd;
dm_block_t keys[1] = { td->id };
__le64 value;
@@ -1561,11 +1567,29 @@ static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_
if (r)
return r;
- r = dm_btree_remove_leaves(&pmd->bl_info, mapping_root, &begin, end, &mapping_root, &count);
- if (r)
- return r;
+ /*
+ * Remove leaves stops at the first unmapped entry, so we have to
+ * loop round finding mapped ranges.
+ */
+ while (begin < end) {
+ r = dm_btree_lookup_next(&pmd->bl_info, mapping_root, &begin, &begin, &value);
+ if (r == -ENODATA)
+ break;
+
+ if (r)
+ return r;
+
+ if (begin >= end)
+ break;
+
+ r = dm_btree_remove_leaves(&pmd->bl_info, mapping_root, &begin, end, &mapping_root, &count);
+ if (r)
+ return r;
+
+ total_count += count;
+ }
- td->mapped_blocks -= count;
+ td->mapped_blocks -= total_count;
td->changed = 1;
/*
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 3897b90bd462..63903a5a5d9e 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2432,6 +2432,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
case PM_WRITE:
if (old_mode != new_mode)
notify_of_pool_mode_change(pool, "write");
+ pool->pf.error_if_no_space = pt->requested_pf.error_if_no_space;
dm_pool_metadata_read_write(pool->pmd);
pool->process_bio = process_bio;
pool->process_discard = process_discard_bio;
@@ -4249,10 +4250,9 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
struct thin_c *tc = ti->private;
struct pool *pool = tc->pool;
- struct queue_limits *pool_limits = dm_get_queue_limits(pool->pool_md);
- if (!pool_limits->discard_granularity)
- return; /* pool's discard support is disabled */
+ if (!pool->pf.discard_enabled)
+ return;
limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
limits->max_discard_sectors = 2048 * 1024 * 16; /* 16G */
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6e15f3565892..5df40480228b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -591,7 +591,7 @@ retry:
out:
dm_put_live_table(md, *srcu_idx);
- if (r == -ENOTCONN) {
+ if (r == -ENOTCONN && !fatal_signal_pending(current)) {
msleep(10);
goto retry;
}
@@ -603,9 +603,10 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
{
struct mapped_device *md = bdev->bd_disk->private_data;
struct dm_target *tgt;
+ struct block_device *tgt_bdev = NULL;
int srcu_idx, r;
- r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
+ r = dm_get_live_table_for_ioctl(md, &tgt, &tgt_bdev, &mode, &srcu_idx);
if (r < 0)
return r;
@@ -620,7 +621,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
goto out;
}
- r = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
+ r = __blkdev_driver_ioctl(tgt_bdev, mode, cmd, arg);
out:
dm_put_live_table(md, srcu_idx);
return r;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 807095f4c793..61aacab424cf 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -314,8 +314,8 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
*/
void mddev_suspend(struct mddev *mddev)
{
- BUG_ON(mddev->suspended);
- mddev->suspended = 1;
+ if (mddev->suspended++)
+ return;
synchronize_rcu();
wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
mddev->pers->quiesce(mddev, 1);
@@ -326,7 +326,8 @@ EXPORT_SYMBOL_GPL(mddev_suspend);
void mddev_resume(struct mddev *mddev)
{
- mddev->suspended = 0;
+ if (--mddev->suspended)
+ return;
wake_up(&mddev->sb_wait);
mddev->pers->quiesce(mddev, 0);
@@ -1652,7 +1653,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
rdev->journal_tail = le64_to_cpu(sb->journal_tail);
if (mddev->recovery_cp == MaxSector)
set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
- rdev->raid_disk = mddev->raid_disks;
+ rdev->raid_disk = 0;
break;
default:
rdev->saved_raid_disk = role;
@@ -2773,6 +2774,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
/* Activating a spare .. or possibly reactivating
* if we ever get bitmaps working here.
*/
+ int err;
if (rdev->raid_disk != -1)
return -EBUSY;
@@ -2794,9 +2796,15 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
rdev->saved_raid_disk = -1;
clear_bit(In_sync, &rdev->flags);
clear_bit(Bitmap_sync, &rdev->flags);
- remove_and_add_spares(rdev->mddev, rdev);
- if (rdev->raid_disk == -1)
- return -EBUSY;
+ err = rdev->mddev->pers->
+ hot_add_disk(rdev->mddev, rdev);
+ if (err) {
+ rdev->raid_disk = -1;
+ return err;
+ } else
+ sysfs_notify_dirent_safe(rdev->sysfs_state);
+ if (sysfs_link_rdev(rdev->mddev, rdev))
+ /* failure here is OK */;
/* don't wakeup anyone, leave that to userspace. */
} else {
if (slot >= rdev->mddev->raid_disks &&
@@ -4318,8 +4326,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
}
mddev_unlock(mddev);
}
- } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
- test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
+ } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return -EBUSY;
else if (cmd_match(page, "resync"))
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4332,8 +4339,12 @@ action_store(struct mddev *mddev, const char *page, size_t len)
return -EINVAL;
err = mddev_lock(mddev);
if (!err) {
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- err = mddev->pers->start_reshape(mddev);
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+ err = -EBUSY;
+ else {
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ err = mddev->pers->start_reshape(mddev);
+ }
mddev_unlock(mddev);
}
if (err)
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 2bea51edfab7..ca0b643fe3c1 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -566,7 +566,9 @@ static inline char * mdname (struct mddev * mddev)
static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
{
char nm[20];
- if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) {
+ if (!test_bit(Replacement, &rdev->flags) &&
+ !test_bit(Journal, &rdev->flags) &&
+ mddev->kobj.sd) {
sprintf(nm, "rd%d", rdev->raid_disk);
return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
} else
@@ -576,7 +578,9 @@ static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
{
char nm[20];
- if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) {
+ if (!test_bit(Replacement, &rdev->flags) &&
+ !test_bit(Journal, &rdev->flags) &&
+ mddev->kobj.sd) {
sprintf(nm, "rd%d", rdev->raid_disk);
sysfs_remove_link(&mddev->kobj, nm);
}
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index c573402033b2..b1ced58eb5e1 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -63,6 +63,11 @@ int lower_bound(struct btree_node *n, uint64_t key)
return bsearch(n, key, 0);
}
+static int upper_bound(struct btree_node *n, uint64_t key)
+{
+ return bsearch(n, key, 1);
+}
+
void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
struct dm_btree_value_type *vt)
{
@@ -252,6 +257,16 @@ static void pop_frame(struct del_stack *s)
dm_tm_unlock(s->tm, f->b);
}
+static void unlock_all_frames(struct del_stack *s)
+{
+ struct frame *f;
+
+ while (unprocessed_frames(s)) {
+ f = s->spine + s->top--;
+ dm_tm_unlock(s->tm, f->b);
+ }
+}
+
int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
{
int r;
@@ -308,9 +323,13 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
pop_frame(s);
}
}
-
out:
+ if (r) {
+ /* cleanup all frames of del_stack */
+ unlock_all_frames(s);
+ }
kfree(s);
+
return r;
}
EXPORT_SYMBOL_GPL(dm_btree_del);
@@ -392,6 +411,82 @@ int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
}
EXPORT_SYMBOL_GPL(dm_btree_lookup);
+static int dm_btree_lookup_next_single(struct dm_btree_info *info, dm_block_t root,
+ uint64_t key, uint64_t *rkey, void *value_le)
+{
+ int r, i;
+ uint32_t flags, nr_entries;
+ struct dm_block *node;
+ struct btree_node *n;
+
+ r = bn_read_lock(info, root, &node);
+ if (r)
+ return r;
+
+ n = dm_block_data(node);
+ flags = le32_to_cpu(n->header.flags);
+ nr_entries = le32_to_cpu(n->header.nr_entries);
+
+ if (flags & INTERNAL_NODE) {
+ i = lower_bound(n, key);
+ if (i < 0 || i >= nr_entries) {
+ r = -ENODATA;
+ goto out;
+ }
+
+ r = dm_btree_lookup_next_single(info, value64(n, i), key, rkey, value_le);
+ if (r == -ENODATA && i < (nr_entries - 1)) {
+ i++;
+ r = dm_btree_lookup_next_single(info, value64(n, i), key, rkey, value_le);
+ }
+
+ } else {
+ i = upper_bound(n, key);
+ if (i < 0 || i >= nr_entries) {
+ r = -ENODATA;
+ goto out;
+ }
+
+ *rkey = le64_to_cpu(n->keys[i]);
+ memcpy(value_le, value_ptr(n, i), info->value_type.size);
+ }
+out:
+ dm_tm_unlock(info->tm, node);
+ return r;
+}
+
+int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *keys, uint64_t *rkey, void *value_le)
+{
+ unsigned level;
+ int r = -ENODATA;
+ __le64 internal_value_le;
+ struct ro_spine spine;
+
+ init_ro_spine(&spine, info);
+ for (level = 0; level < info->levels - 1u; level++) {
+ r = btree_lookup_raw(&spine, root, keys[level],
+ lower_bound, rkey,
+ &internal_value_le, sizeof(uint64_t));
+ if (r)
+ goto out;
+
+ if (*rkey != keys[level]) {
+ r = -ENODATA;
+ goto out;
+ }
+
+ root = le64_to_cpu(internal_value_le);
+ }
+
+ r = dm_btree_lookup_next_single(info, root, keys[level], rkey, value_le);
+out:
+ exit_ro_spine(&spine);
+ return r;
+}
+
+EXPORT_SYMBOL_GPL(dm_btree_lookup_next);
+
/*
* Splits a node by creating a sibling node and shifting half the nodes
* contents across. Assumes there is a parent node, and it has room for
@@ -473,8 +568,10 @@ static int btree_split_sibling(struct shadow_spine *s, unsigned parent_index,
r = insert_at(sizeof(__le64), pn, parent_index + 1,
le64_to_cpu(rn->keys[0]), &location);
- if (r)
+ if (r) {
+ unlock_block(s->info, right);
return r;
+ }
if (key < le64_to_cpu(rn->keys[0])) {
unlock_block(s->info, right);
diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h
index 11d8cf78621d..c74301fa5a37 100644
--- a/drivers/md/persistent-data/dm-btree.h
+++ b/drivers/md/persistent-data/dm-btree.h
@@ -110,6 +110,13 @@ int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
uint64_t *keys, void *value_le);
/*
+ * Tries to find the first key where the bottom level key is >= to that
+ * given. Useful for skipping empty sections of the btree.
+ */
+int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *keys, uint64_t *rkey, void *value_le);
+
+/*
* Insertion (or overwrite an existing value). O(ln(n))
*/
int dm_btree_insert(struct dm_btree_info *info, dm_block_t root,
@@ -135,9 +142,10 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
uint64_t *keys, dm_block_t *new_root);
/*
- * Removes values between 'keys' and keys2, where keys2 is keys with the
- * final key replaced with 'end_key'. 'end_key' is the one-past-the-end
- * value. 'keys' may be altered.
+ * Removes a _contiguous_ run of values starting from 'keys' and not
+ * reaching keys2 (where keys2 is keys with the final key replaced with
+ * 'end_key'). 'end_key' is the one-past-the-end value. 'keys' may be
+ * altered.
*/
int dm_btree_remove_leaves(struct dm_btree_info *info, dm_block_t root,
uint64_t *keys, uint64_t end_key,
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c
index 53091295fce9..fca6dbcf9a47 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.c
+++ b/drivers/md/persistent-data/dm-space-map-metadata.c
@@ -136,7 +136,7 @@ static int brb_push(struct bop_ring_buffer *brb,
return 0;
}
-static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result)
+static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result)
{
struct block_op *bop;
@@ -147,6 +147,17 @@ static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result)
result->type = bop->type;
result->block = bop->block;
+ return 0;
+}
+
+static int brb_pop(struct bop_ring_buffer *brb)
+{
+ struct block_op *bop;
+
+ if (brb_empty(brb))
+ return -ENODATA;
+
+ bop = brb->bops + brb->begin;
brb->begin = brb_next(brb, brb->begin);
return 0;
@@ -211,7 +222,7 @@ static int apply_bops(struct sm_metadata *smm)
while (!brb_empty(&smm->uncommitted)) {
struct block_op bop;
- r = brb_pop(&smm->uncommitted, &bop);
+ r = brb_peek(&smm->uncommitted, &bop);
if (r) {
DMERR("bug in bop ring buffer");
break;
@@ -220,6 +231,8 @@ static int apply_bops(struct sm_metadata *smm)
r = commit_bop(smm, &bop);
if (r)
break;
+
+ brb_pop(&smm->uncommitted);
}
return r;
@@ -683,7 +696,6 @@ static struct dm_space_map bootstrap_ops = {
static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
{
int r, i;
- enum allocation_event ev;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
dm_block_t old_len = smm->ll.nr_blocks;
@@ -705,11 +717,12 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
* allocate any new blocks.
*/
do {
- for (i = old_len; !r && i < smm->begin; i++) {
- r = sm_ll_inc(&smm->ll, i, &ev);
- if (r)
- goto out;
- }
+ for (i = old_len; !r && i < smm->begin; i++)
+ r = add_bop(smm, BOP_INC, i);
+
+ if (r)
+ goto out;
+
old_len = smm->begin;
r = apply_bops(smm);
@@ -754,7 +767,6 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
{
int r;
dm_block_t i;
- enum allocation_event ev;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
smm->begin = superblock + 1;
@@ -782,7 +794,7 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
* allocated blocks that they were built from.
*/
for (i = superblock; !r && i < smm->begin; i++)
- r = sm_ll_inc(&smm->ll, i, &ev);
+ r = add_bop(smm, BOP_INC, i);
if (r)
return r;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 41d70bc9ba2f..84e597e1c489 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1946,6 +1946,8 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
first = i;
fbio = r10_bio->devs[i].bio;
+ fbio->bi_iter.bi_size = r10_bio->sectors << 9;
+ fbio->bi_iter.bi_idx = 0;
vcnt = (r10_bio->sectors + (PAGE_SIZE >> 9) - 1) >> (PAGE_SHIFT - 9);
/* now find blocks with errors */
@@ -1989,7 +1991,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
bio_reset(tbio);
tbio->bi_vcnt = vcnt;
- tbio->bi_iter.bi_size = r10_bio->sectors << 9;
+ tbio->bi_iter.bi_size = fbio->bi_iter.bi_size;
tbio->bi_rw = WRITE;
tbio->bi_private = r10_bio;
tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
diff --git a/drivers/media/pci/cx23885/cx23885-core.c b/drivers/media/pci/cx23885/cx23885-core.c
index 35759a91d47d..e8f847226a19 100644
--- a/drivers/media/pci/cx23885/cx23885-core.c
+++ b/drivers/media/pci/cx23885/cx23885-core.c
@@ -1992,9 +1992,9 @@ static int cx23885_initdev(struct pci_dev *pci_dev,
(unsigned long long)pci_resource_start(pci_dev, 0));
pci_set_master(pci_dev);
- if (!pci_set_dma_mask(pci_dev, 0xffffffff)) {
+ err = pci_set_dma_mask(pci_dev, 0xffffffff);
+ if (err) {
printk("%s/0: Oops: no 32bit PCI DMA ???\n", dev->name);
- err = -EIO;
goto fail_context;
}
diff --git a/drivers/media/pci/cx25821/cx25821-core.c b/drivers/media/pci/cx25821/cx25821-core.c
index dbc695f32760..0042803a9de7 100644
--- a/drivers/media/pci/cx25821/cx25821-core.c
+++ b/drivers/media/pci/cx25821/cx25821-core.c
@@ -1319,7 +1319,8 @@ static int cx25821_initdev(struct pci_dev *pci_dev,
dev->pci_lat, (unsigned long long)dev->base_io_addr);
pci_set_master(pci_dev);
- if (!pci_set_dma_mask(pci_dev, 0xffffffff)) {
+ err = pci_set_dma_mask(pci_dev, 0xffffffff);
+ if (err) {
pr_err("%s/0: Oops: no 32bit PCI DMA ???\n", dev->name);
err = -EIO;
goto fail_irq;
diff --git a/drivers/media/pci/cx88/cx88-alsa.c b/drivers/media/pci/cx88/cx88-alsa.c
index 0ed1b6530374..1b5268f9bb24 100644
--- a/drivers/media/pci/cx88/cx88-alsa.c
+++ b/drivers/media/pci/cx88/cx88-alsa.c
@@ -890,9 +890,9 @@ static int snd_cx88_create(struct snd_card *card, struct pci_dev *pci,
return err;
}
- if (!pci_set_dma_mask(pci,DMA_BIT_MASK(32))) {
+ err = pci_set_dma_mask(pci,DMA_BIT_MASK(32));
+ if (err) {
dprintk(0, "%s/1: Oops: no 32bit PCI DMA ???\n",core->name);
- err = -EIO;
cx88_core_put(core, pci);
return err;
}
diff --git a/drivers/media/pci/cx88/cx88-mpeg.c b/drivers/media/pci/cx88/cx88-mpeg.c
index 9db7767d1fe0..f34c229f9b37 100644
--- a/drivers/media/pci/cx88/cx88-mpeg.c
+++ b/drivers/media/pci/cx88/cx88-mpeg.c
@@ -393,7 +393,8 @@ static int cx8802_init_common(struct cx8802_dev *dev)
if (pci_enable_device(dev->pci))
return -EIO;
pci_set_master(dev->pci);
- if (!pci_set_dma_mask(dev->pci,DMA_BIT_MASK(32))) {
+ err = pci_set_dma_mask(dev->pci,DMA_BIT_MASK(32));
+ if (err) {
printk("%s/2: Oops: no 32bit PCI DMA ???\n",dev->core->name);
return -EIO;
}
diff --git a/drivers/media/pci/cx88/cx88-video.c b/drivers/media/pci/cx88/cx88-video.c
index 0de1ad5a977d..aef9acf351f6 100644
--- a/drivers/media/pci/cx88/cx88-video.c
+++ b/drivers/media/pci/cx88/cx88-video.c
@@ -1314,9 +1314,9 @@ static int cx8800_initdev(struct pci_dev *pci_dev,
dev->pci_lat,(unsigned long long)pci_resource_start(pci_dev,0));
pci_set_master(pci_dev);
- if (!pci_set_dma_mask(pci_dev,DMA_BIT_MASK(32))) {
+ err = pci_set_dma_mask(pci_dev,DMA_BIT_MASK(32));
+ if (err) {
printk("%s/0: Oops: no 32bit PCI DMA ???\n",core->name);
- err = -EIO;
goto fail_core;
}
dev->alloc_ctx = vb2_dma_sg_init_ctx(&pci_dev->dev);
diff --git a/drivers/media/pci/ivtv/ivtv-driver.c b/drivers/media/pci/ivtv/ivtv-driver.c
index 8616fa8193bc..c2e60b4f292d 100644
--- a/drivers/media/pci/ivtv/ivtv-driver.c
+++ b/drivers/media/pci/ivtv/ivtv-driver.c
@@ -805,11 +805,11 @@ static void ivtv_init_struct2(struct ivtv *itv)
{
int i;
- for (i = 0; i < IVTV_CARD_MAX_VIDEO_INPUTS - 1; i++)
+ for (i = 0; i < IVTV_CARD_MAX_VIDEO_INPUTS; i++)
if (itv->card->video_inputs[i].video_type == 0)
break;
itv->nof_inputs = i;
- for (i = 0; i < IVTV_CARD_MAX_AUDIO_INPUTS - 1; i++)
+ for (i = 0; i < IVTV_CARD_MAX_AUDIO_INPUTS; i++)
if (itv->card->audio_inputs[i].audio_type == 0)
break;
itv->nof_audio_inputs = i;
diff --git a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
index 60b2d462f98d..3fdbd81b5580 100644
--- a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
+++ b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
@@ -810,7 +810,7 @@ static int netup_unidvb_initdev(struct pci_dev *pci_dev,
"%s(): board vendor 0x%x, revision 0x%x\n",
__func__, board_vendor, board_revision);
pci_set_master(pci_dev);
- if (!pci_set_dma_mask(pci_dev, 0xffffffff)) {
+ if (pci_set_dma_mask(pci_dev, 0xffffffff) < 0) {
dev_err(&pci_dev->dev,
"%s(): 32bit PCI DMA is not supported\n", __func__);
goto pci_detect_err;
diff --git a/drivers/media/pci/saa7134/saa7134-core.c b/drivers/media/pci/saa7134/saa7134-core.c
index e79d63eb774e..f720cea80e28 100644
--- a/drivers/media/pci/saa7134/saa7134-core.c
+++ b/drivers/media/pci/saa7134/saa7134-core.c
@@ -951,9 +951,9 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
pci_name(pci_dev), dev->pci_rev, pci_dev->irq,
dev->pci_lat,(unsigned long long)pci_resource_start(pci_dev,0));
pci_set_master(pci_dev);
- if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) {
+ err = pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32));
+ if (err) {
pr_warn("%s: Oops: no 32bit PCI DMA ???\n", dev->name);
- err = -EIO;
goto fail1;
}
diff --git a/drivers/media/pci/saa7164/saa7164-core.c b/drivers/media/pci/saa7164/saa7164-core.c
index 8f36b48ef733..8bbd092fbe1d 100644
--- a/drivers/media/pci/saa7164/saa7164-core.c
+++ b/drivers/media/pci/saa7164/saa7164-core.c
@@ -1264,9 +1264,9 @@ static int saa7164_initdev(struct pci_dev *pci_dev,
pci_set_master(pci_dev);
/* TODO */
- if (!pci_set_dma_mask(pci_dev, 0xffffffff)) {
+ err = pci_set_dma_mask(pci_dev, 0xffffffff);
+ if (err) {
printk("%s/0: Oops: no 32bit PCI DMA ???\n", dev->name);
- err = -EIO;
goto fail_irq;
}
diff --git a/drivers/media/pci/tw68/tw68-core.c b/drivers/media/pci/tw68/tw68-core.c
index 8c5655d351d3..4e77618fbb2b 100644
--- a/drivers/media/pci/tw68/tw68-core.c
+++ b/drivers/media/pci/tw68/tw68-core.c
@@ -257,9 +257,9 @@ static int tw68_initdev(struct pci_dev *pci_dev,
dev->name, pci_name(pci_dev), dev->pci_rev, pci_dev->irq,
dev->pci_lat, (u64)pci_resource_start(pci_dev, 0));
pci_set_master(pci_dev);
- if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) {
+ err = pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32));
+ if (err) {
pr_info("%s: Oops: no 32bit PCI DMA ???\n", dev->name);
- err = -EIO;
goto fail1;
}
diff --git a/drivers/media/usb/airspy/airspy.c b/drivers/media/usb/airspy/airspy.c
index fcbb49757614..565a59310747 100644
--- a/drivers/media/usb/airspy/airspy.c
+++ b/drivers/media/usb/airspy/airspy.c
@@ -134,7 +134,7 @@ struct airspy {
int urbs_submitted;
/* USB control message buffer */
- #define BUF_SIZE 24
+ #define BUF_SIZE 128
u8 buf[BUF_SIZE];
/* Current configuration */
diff --git a/drivers/media/usb/hackrf/hackrf.c b/drivers/media/usb/hackrf/hackrf.c
index e05bfec90f46..0fe5cb2c260c 100644
--- a/drivers/media/usb/hackrf/hackrf.c
+++ b/drivers/media/usb/hackrf/hackrf.c
@@ -24,6 +24,15 @@
#include <media/videobuf2-v4l2.h>
#include <media/videobuf2-vmalloc.h>
+/*
+ * Used Avago MGA-81563 RF amplifier could be destroyed pretty easily with too
+ * strong signal or transmitting to bad antenna.
+ * Set RF gain control to 'grabbed' state by default for sure.
+ */
+static bool hackrf_enable_rf_gain_ctrl;
+module_param_named(enable_rf_gain_ctrl, hackrf_enable_rf_gain_ctrl, bool, 0644);
+MODULE_PARM_DESC(enable_rf_gain_ctrl, "enable RX/TX RF amplifier control (warn: could damage amplifier)");
+
/* HackRF USB API commands (from HackRF Library) */
enum {
CMD_SET_TRANSCEIVER_MODE = 0x01,
@@ -1451,6 +1460,7 @@ static int hackrf_probe(struct usb_interface *intf,
dev_err(dev->dev, "Could not initialize controls\n");
goto err_v4l2_ctrl_handler_free_rx;
}
+ v4l2_ctrl_grab(dev->rx_rf_gain, !hackrf_enable_rf_gain_ctrl);
v4l2_ctrl_handler_setup(&dev->rx_ctrl_handler);
/* Register controls for transmitter */
@@ -1471,6 +1481,7 @@ static int hackrf_probe(struct usb_interface *intf,
dev_err(dev->dev, "Could not initialize controls\n");
goto err_v4l2_ctrl_handler_free_tx;
}
+ v4l2_ctrl_grab(dev->tx_rf_gain, !hackrf_enable_rf_gain_ctrl);
v4l2_ctrl_handler_setup(&dev->tx_ctrl_handler);
/* Register the v4l2_device structure */
@@ -1530,7 +1541,7 @@ err_v4l2_ctrl_handler_free_rx:
err_kfree:
kfree(dev);
err:
- dev_dbg(dev->dev, "failed=%d\n", ret);
+ dev_dbg(&intf->dev, "failed=%d\n", ret);
return ret;
}
diff --git a/drivers/memory/fsl_ifc.c b/drivers/memory/fsl_ifc.c
index e87459f6d686..acd1460cf787 100644
--- a/drivers/memory/fsl_ifc.c
+++ b/drivers/memory/fsl_ifc.c
@@ -22,6 +22,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/compiler.h>
+#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/slab.h>
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index d2e75c88f4d2..f40909793490 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -497,6 +497,7 @@ static u64 calculate_sr(struct cxl_context *ctx)
{
u64 sr = 0;
+ set_endian(sr);
if (ctx->master)
sr |= CXL_PSL_SR_An_MP;
if (mfspr(SPRN_LPCR) & LPCR_TC)
@@ -506,7 +507,6 @@ static u64 calculate_sr(struct cxl_context *ctx)
sr |= CXL_PSL_SR_An_HV;
} else {
sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R;
- set_endian(sr);
sr &= ~(CXL_PSL_SR_An_HV);
if (!test_tsk_thread_flag(current, TIF_32BIT))
sr |= CXL_PSL_SR_An_SF;
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 23b6c8e8701c..d8486168415a 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -65,8 +65,7 @@ MODULE_ALIAS("mmc:block");
#define MMC_SANITIZE_REQ_TIMEOUT 240000
#define MMC_EXTRACT_INDEX_FROM_ARG(x) ((x & 0x00FF0000) >> 16)
-#define mmc_req_rel_wr(req) (((req->cmd_flags & REQ_FUA) || \
- (req->cmd_flags & REQ_META)) && \
+#define mmc_req_rel_wr(req) ((req->cmd_flags & REQ_FUA) && \
(rq_data_dir(req) == WRITE))
#define PACKED_CMD_VER 0x01
#define PACKED_CMD_WR 0x02
@@ -1467,13 +1466,9 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
/*
* Reliable writes are used to implement Forced Unit Access and
- * REQ_META accesses, and are supported only on MMCs.
- *
- * XXX: this really needs a good explanation of why REQ_META
- * is treated special.
+ * are supported only on MMCs.
*/
- bool do_rel_wr = ((req->cmd_flags & REQ_FUA) ||
- (req->cmd_flags & REQ_META)) &&
+ bool do_rel_wr = (req->cmd_flags & REQ_FUA) &&
(rq_data_dir(req) == WRITE) &&
(md->flags & MMC_BLK_REL_WR);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index c793fda27321..3a9a79ec4343 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1040,9 +1040,24 @@ static int mmc_select_hs_ddr(struct mmc_card *card)
return err;
}
+/* Caller must hold re-tuning */
+static int mmc_switch_status(struct mmc_card *card)
+{
+ u32 status;
+ int err;
+
+ err = mmc_send_status(card, &status);
+ if (err)
+ return err;
+
+ return mmc_switch_status_error(card->host, status);
+}
+
static int mmc_select_hs400(struct mmc_card *card)
{
struct mmc_host *host = card->host;
+ bool send_status = true;
+ unsigned int max_dtr;
int err = 0;
u8 val;
@@ -1053,25 +1068,36 @@ static int mmc_select_hs400(struct mmc_card *card)
host->ios.bus_width == MMC_BUS_WIDTH_8))
return 0;
- /*
- * Before switching to dual data rate operation for HS400,
- * it is required to convert from HS200 mode to HS mode.
- */
- mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
- mmc_set_bus_speed(card);
+ if (host->caps & MMC_CAP_WAIT_WHILE_BUSY)
+ send_status = false;
+ /* Reduce frequency to HS frequency */
+ max_dtr = card->ext_csd.hs_max_dtr;
+ mmc_set_clock(host, max_dtr);
+
+ /* Switch card to HS mode */
val = EXT_CSD_TIMING_HS |
card->drive_strength << EXT_CSD_DRV_STR_SHIFT;
err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
EXT_CSD_HS_TIMING, val,
card->ext_csd.generic_cmd6_time,
- true, true, true);
+ true, send_status, true);
if (err) {
pr_err("%s: switch to high-speed from hs200 failed, err:%d\n",
mmc_hostname(host), err);
return err;
}
+ /* Set host controller to HS timing */
+ mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+
+ if (!send_status) {
+ err = mmc_switch_status(card);
+ if (err)
+ goto out_err;
+ }
+
+ /* Switch card to DDR */
err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
EXT_CSD_BUS_WIDTH,
EXT_CSD_DDR_BUS_WIDTH_8,
@@ -1082,22 +1108,35 @@ static int mmc_select_hs400(struct mmc_card *card)
return err;
}
+ /* Switch card to HS400 */
val = EXT_CSD_TIMING_HS400 |
card->drive_strength << EXT_CSD_DRV_STR_SHIFT;
err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
EXT_CSD_HS_TIMING, val,
card->ext_csd.generic_cmd6_time,
- true, true, true);
+ true, send_status, true);
if (err) {
pr_err("%s: switch to hs400 failed, err:%d\n",
mmc_hostname(host), err);
return err;
}
+ /* Set host controller to HS400 timing and frequency */
mmc_set_timing(host, MMC_TIMING_MMC_HS400);
mmc_set_bus_speed(card);
+ if (!send_status) {
+ err = mmc_switch_status(card);
+ if (err)
+ goto out_err;
+ }
+
return 0;
+
+out_err:
+ pr_err("%s: %s failed, error %d\n", mmc_hostname(card->host),
+ __func__, err);
+ return err;
}
int mmc_hs200_to_hs400(struct mmc_card *card)
@@ -1105,19 +1144,6 @@ int mmc_hs200_to_hs400(struct mmc_card *card)
return mmc_select_hs400(card);
}
-/* Caller must hold re-tuning */
-static int mmc_switch_status(struct mmc_card *card)
-{
- u32 status;
- int err;
-
- err = mmc_send_status(card, &status);
- if (err)
- return err;
-
- return mmc_switch_status_error(card->host, status);
-}
-
int mmc_hs400_to_hs200(struct mmc_card *card)
{
struct mmc_host *host = card->host;
@@ -1219,6 +1245,8 @@ static void mmc_select_driver_type(struct mmc_card *card)
static int mmc_select_hs200(struct mmc_card *card)
{
struct mmc_host *host = card->host;
+ bool send_status = true;
+ unsigned int old_timing;
int err = -EINVAL;
u8 val;
@@ -1234,6 +1262,9 @@ static int mmc_select_hs200(struct mmc_card *card)
mmc_select_driver_type(card);
+ if (host->caps & MMC_CAP_WAIT_WHILE_BUSY)
+ send_status = false;
+
/*
* Set the bus width(4 or 8) with host's support and
* switch to HS200 mode if bus width is set successfully.
@@ -1245,11 +1276,25 @@ static int mmc_select_hs200(struct mmc_card *card)
err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
EXT_CSD_HS_TIMING, val,
card->ext_csd.generic_cmd6_time,
- true, true, true);
- if (!err)
- mmc_set_timing(host, MMC_TIMING_MMC_HS200);
+ true, send_status, true);
+ if (err)
+ goto err;
+ old_timing = host->ios.timing;
+ mmc_set_timing(host, MMC_TIMING_MMC_HS200);
+ if (!send_status) {
+ err = mmc_switch_status(card);
+ /*
+ * mmc_select_timing() assumes timing has not changed if
+ * it is a switch error.
+ */
+ if (err == -EBADMSG)
+ mmc_set_timing(host, old_timing);
+ }
}
err:
+ if (err)
+ pr_err("%s: %s failed, error %d\n", mmc_hostname(card->host),
+ __func__, err);
return err;
}
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index af71de5fda3b..1dee533634c9 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -473,6 +473,7 @@ config MMC_DAVINCI
config MMC_GOLDFISH
tristate "goldfish qemu Multimedia Card Interface support"
+ depends on HAS_DMA
depends on GOLDFISH || COMPILE_TEST
help
This selects the Goldfish Multimedia card Interface emulation
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 39568cc29a2a..33dfd7e72516 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -1276,7 +1276,7 @@ static struct msdc_delay_phase get_best_delay(struct msdc_host *host, u32 delay)
int start = 0, len = 0;
int start_final = 0, len_final = 0;
u8 final_phase = 0xff;
- struct msdc_delay_phase delay_phase;
+ struct msdc_delay_phase delay_phase = { 0, };
if (delay == 0) {
dev_err(host->dev, "phase error: [map:%x]\n", delay);
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index 8cadd74e8407..ce08896b9d69 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -805,7 +805,7 @@ static int pxamci_probe(struct platform_device *pdev)
goto out;
} else {
mmc->caps |= host->pdata->gpio_card_ro_invert ?
- MMC_CAP2_RO_ACTIVE_HIGH : 0;
+ 0 : MMC_CAP2_RO_ACTIVE_HIGH;
}
if (gpio_is_valid(gpio_cd))
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 95c13b2ffa79..ffa288474820 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -426,15 +426,6 @@ int add_mtd_device(struct mtd_info *mtd)
mtd->erasesize_mask = (1 << mtd->erasesize_shift) - 1;
mtd->writesize_mask = (1 << mtd->writesize_shift) - 1;
- if (mtd->dev.parent) {
- if (!mtd->owner && mtd->dev.parent->driver)
- mtd->owner = mtd->dev.parent->driver->owner;
- if (!mtd->name)
- mtd->name = dev_name(mtd->dev.parent);
- } else {
- pr_debug("mtd device won't show a device symlink in sysfs\n");
- }
-
/* Some chips always power up locked. Unlock them now */
if ((mtd->flags & MTD_WRITEABLE) && (mtd->flags & MTD_POWERUP_LOCK)) {
error = mtd_unlock(mtd, 0, mtd->size);
@@ -549,6 +540,21 @@ static int mtd_add_device_partitions(struct mtd_info *mtd,
return 0;
}
+/*
+ * Set a few defaults based on the parent devices, if not provided by the
+ * driver
+ */
+static void mtd_set_dev_defaults(struct mtd_info *mtd)
+{
+ if (mtd->dev.parent) {
+ if (!mtd->owner && mtd->dev.parent->driver)
+ mtd->owner = mtd->dev.parent->driver->owner;
+ if (!mtd->name)
+ mtd->name = dev_name(mtd->dev.parent);
+ } else {
+ pr_debug("mtd device won't show a device symlink in sysfs\n");
+ }
+}
/**
* mtd_device_parse_register - parse partitions and register an MTD device.
@@ -587,6 +593,8 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types,
int ret;
struct mtd_partition *real_parts = NULL;
+ mtd_set_dev_defaults(mtd);
+
ret = parse_mtd_partitions(mtd, types, &real_parts, parser_data);
if (ret <= 0 && nr_parts && parts) {
real_parts = kmemdup(parts, sizeof(*parts) * nr_parts,
diff --git a/drivers/mtd/nand/jz4740_nand.c b/drivers/mtd/nand/jz4740_nand.c
index dc4e8446f1ff..5a99a93ed025 100644
--- a/drivers/mtd/nand/jz4740_nand.c
+++ b/drivers/mtd/nand/jz4740_nand.c
@@ -25,6 +25,7 @@
#include <linux/gpio.h>
+#include <asm/mach-jz4740/gpio.h>
#include <asm/mach-jz4740/jz4740_nand.h>
#define JZ_REG_NAND_CTRL 0x50
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index cc74142938b0..ece544efccc3 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3110,7 +3110,7 @@ static void nand_resume(struct mtd_info *mtd)
*/
static void nand_shutdown(struct mtd_info *mtd)
{
- nand_get_device(mtd, FL_SHUTDOWN);
+ nand_get_device(mtd, FL_PM_SUSPENDED);
}
/* Set default functions */
diff --git a/drivers/mtd/ofpart.c b/drivers/mtd/ofpart.c
index 669c3452f278..9ed6038e47d2 100644
--- a/drivers/mtd/ofpart.c
+++ b/drivers/mtd/ofpart.c
@@ -46,10 +46,18 @@ static int parse_ofpart_partitions(struct mtd_info *master,
ofpart_node = of_get_child_by_name(mtd_node, "partitions");
if (!ofpart_node) {
- pr_warn("%s: 'partitions' subnode not found on %s. Trying to parse direct subnodes as partitions.\n",
- master->name, mtd_node->full_name);
+ /*
+ * We might get here even when ofpart isn't used at all (e.g.,
+ * when using another parser), so don't be louder than
+ * KERN_DEBUG
+ */
+ pr_debug("%s: 'partitions' subnode not found on %s. Trying to parse direct subnodes as partitions.\n",
+ master->name, mtd_node->full_name);
ofpart_node = mtd_node;
dedicated = false;
+ } else if (!of_device_is_compatible(ofpart_node, "fixed-partitions")) {
+ /* The 'partitions' subnode might be used by another parser */
+ return 0;
}
/* First count the subnodes */
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 49883905a434..32477c4eb421 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -516,8 +516,8 @@ static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
status_old = read_sr(nor);
/* Cannot unlock; would unlock larger region than requested */
- if (stm_is_locked_sr(nor, status_old, ofs - mtd->erasesize,
- mtd->erasesize))
+ if (stm_is_locked_sr(nor, ofs - mtd->erasesize, mtd->erasesize,
+ status_old))
return -EINVAL;
/*
@@ -1200,8 +1200,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
if (JEDEC_MFR(info) == SNOR_MFR_ATMEL ||
JEDEC_MFR(info) == SNOR_MFR_INTEL ||
- JEDEC_MFR(info) == SNOR_MFR_SST ||
- JEDEC_MFR(info) == SNOR_MFR_WINBOND) {
+ JEDEC_MFR(info) == SNOR_MFR_SST) {
write_enable(nor);
write_sr(nor, 0);
}
@@ -1217,8 +1216,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
mtd->_read = spi_nor_read;
/* NOR protection support for STmicro/Micron chips and similar */
- if (JEDEC_MFR(info) == SNOR_MFR_MICRON ||
- JEDEC_MFR(info) == SNOR_MFR_WINBOND) {
+ if (JEDEC_MFR(info) == SNOR_MFR_MICRON) {
nor->flash_lock = stm_lock;
nor->flash_unlock = stm_unlock;
nor->flash_is_locked = stm_is_locked;
diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
index b077e43b5ba9..c4cb15a3098c 100644
--- a/drivers/mtd/ubi/debug.c
+++ b/drivers/mtd/ubi/debug.c
@@ -236,7 +236,7 @@ int ubi_debugfs_init(void)
dfs_rootdir = debugfs_create_dir("ubi", NULL);
if (IS_ERR_OR_NULL(dfs_rootdir)) {
- int err = dfs_rootdir ? -ENODEV : PTR_ERR(dfs_rootdir);
+ int err = dfs_rootdir ? PTR_ERR(dfs_rootdir) : -ENODEV;
pr_err("UBI error: cannot create \"ubi\" debugfs directory, error %d\n",
err);
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index 1fc23e48fe8e..10cf3b549959 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -1299,7 +1299,7 @@ static int self_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum)
if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err))
goto exit;
- crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_EC_HDR_SIZE_CRC);
+ crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC);
hdr_crc = be32_to_cpu(vid_hdr->hdr_crc);
if (hdr_crc != crc) {
ubi_err(ubi, "bad VID header CRC at PEB %d, calculated %#08x, read %#08x",
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index eb4489f9082f..56065632a5b8 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -603,6 +603,7 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
return 0;
}
+static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk);
/**
* do_sync_erase - run the erase worker synchronously.
* @ubi: UBI device description object
@@ -615,20 +616,16 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
static int do_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
int vol_id, int lnum, int torture)
{
- struct ubi_work *wl_wrk;
+ struct ubi_work wl_wrk;
dbg_wl("sync erase of PEB %i", e->pnum);
- wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
- if (!wl_wrk)
- return -ENOMEM;
-
- wl_wrk->e = e;
- wl_wrk->vol_id = vol_id;
- wl_wrk->lnum = lnum;
- wl_wrk->torture = torture;
+ wl_wrk.e = e;
+ wl_wrk.vol_id = vol_id;
+ wl_wrk.lnum = lnum;
+ wl_wrk.torture = torture;
- return erase_worker(ubi, wl_wrk, 0);
+ return __erase_worker(ubi, &wl_wrk);
}
/**
@@ -1014,7 +1011,7 @@ out_unlock:
}
/**
- * erase_worker - physical eraseblock erase worker function.
+ * __erase_worker - physical eraseblock erase worker function.
* @ubi: UBI device description object
* @wl_wrk: the work object
* @shutdown: non-zero if the worker has to free memory and exit
@@ -1025,8 +1022,7 @@ out_unlock:
* needed. Returns zero in case of success and a negative error code in case of
* failure.
*/
-static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
- int shutdown)
+static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk)
{
struct ubi_wl_entry *e = wl_wrk->e;
int pnum = e->pnum;
@@ -1034,21 +1030,11 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
int lnum = wl_wrk->lnum;
int err, available_consumed = 0;
- if (shutdown) {
- dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec);
- kfree(wl_wrk);
- wl_entry_destroy(ubi, e);
- return 0;
- }
-
dbg_wl("erase PEB %d EC %d LEB %d:%d",
pnum, e->ec, wl_wrk->vol_id, wl_wrk->lnum);
err = sync_erase(ubi, e, wl_wrk->torture);
if (!err) {
- /* Fine, we've erased it successfully */
- kfree(wl_wrk);
-
spin_lock(&ubi->wl_lock);
wl_tree_add(e, &ubi->free);
ubi->free_count++;
@@ -1066,7 +1052,6 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
}
ubi_err(ubi, "failed to erase PEB %d, error %d", pnum, err);
- kfree(wl_wrk);
if (err == -EINTR || err == -ENOMEM || err == -EAGAIN ||
err == -EBUSY) {
@@ -1075,6 +1060,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
/* Re-schedule the LEB for erasure */
err1 = schedule_erase(ubi, e, vol_id, lnum, 0);
if (err1) {
+ wl_entry_destroy(ubi, e);
err = err1;
goto out_ro;
}
@@ -1150,6 +1136,25 @@ out_ro:
return err;
}
+static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
+ int shutdown)
+{
+ int ret;
+
+ if (shutdown) {
+ struct ubi_wl_entry *e = wl_wrk->e;
+
+ dbg_wl("cancel erasure of PEB %d EC %d", e->pnum, e->ec);
+ kfree(wl_wrk);
+ wl_entry_destroy(ubi, e);
+ return 0;
+ }
+
+ ret = __erase_worker(ubi, wl_wrk);
+ kfree(wl_wrk);
+ return ret;
+}
+
/**
* ubi_wl_put_peb - return a PEB to the wear-leveling sub-system.
* @ubi: UBI device description object
diff --git a/drivers/net/can/bfin_can.c b/drivers/net/can/bfin_can.c
index 57dadd52b428..1deb8ff90a89 100644
--- a/drivers/net/can/bfin_can.c
+++ b/drivers/net/can/bfin_can.c
@@ -501,8 +501,6 @@ static int bfin_can_err(struct net_device *dev, u16 isrc, u16 status)
cf->data[2] |= CAN_ERR_PROT_FORM;
else if (status & SER)
cf->data[2] |= CAN_ERR_PROT_STUFF;
- else
- cf->data[2] |= CAN_ERR_PROT_UNSPEC;
}
priv->can.state = state;
diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 5d214d135332..f91b094288da 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -962,7 +962,6 @@ static int c_can_handle_bus_err(struct net_device *dev,
* type of the last error to occur on the CAN bus
*/
cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
- cf->data[2] |= CAN_ERR_PROT_UNSPEC;
switch (lec_type) {
case LEC_STUFF_ERROR:
@@ -975,8 +974,7 @@ static int c_can_handle_bus_err(struct net_device *dev,
break;
case LEC_ACK_ERROR:
netdev_dbg(dev, "ack error\n");
- cf->data[3] |= (CAN_ERR_PROT_LOC_ACK |
- CAN_ERR_PROT_LOC_ACK_DEL);
+ cf->data[3] = CAN_ERR_PROT_LOC_ACK;
break;
case LEC_BIT1_ERROR:
netdev_dbg(dev, "bit1 error\n");
@@ -988,8 +986,7 @@ static int c_can_handle_bus_err(struct net_device *dev,
break;
case LEC_CRC_ERROR:
netdev_dbg(dev, "CRC error\n");
- cf->data[3] |= (CAN_ERR_PROT_LOC_CRC_SEQ |
- CAN_ERR_PROT_LOC_CRC_DEL);
+ cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
break;
default:
break;
diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c
index 70a8cbb29e75..1e37313054f3 100644
--- a/drivers/net/can/cc770/cc770.c
+++ b/drivers/net/can/cc770/cc770.c
@@ -578,7 +578,7 @@ static int cc770_err(struct net_device *dev, u8 status)
cf->data[2] |= CAN_ERR_PROT_BIT0;
break;
case STAT_LEC_CRC:
- cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
+ cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
break;
}
}
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 868fe945e35a..41c0fc9f3b14 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -535,13 +535,13 @@ static void do_bus_err(struct net_device *dev,
if (reg_esr & FLEXCAN_ESR_ACK_ERR) {
netdev_dbg(dev, "ACK_ERR irq\n");
cf->can_id |= CAN_ERR_ACK;
- cf->data[3] |= CAN_ERR_PROT_LOC_ACK;
+ cf->data[3] = CAN_ERR_PROT_LOC_ACK;
tx_errors = 1;
}
if (reg_esr & FLEXCAN_ESR_CRC_ERR) {
netdev_dbg(dev, "CRC_ERR irq\n");
cf->data[2] |= CAN_ERR_PROT_BIT;
- cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
+ cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
rx_errors = 1;
}
if (reg_esr & FLEXCAN_ESR_FRM_ERR) {
diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c
index c1e85368a198..5d04f5464faf 100644
--- a/drivers/net/can/janz-ican3.c
+++ b/drivers/net/can/janz-ican3.c
@@ -1096,7 +1096,6 @@ static int ican3_handle_cevtind(struct ican3_dev *mod, struct ican3_msg *msg)
cf->data[2] |= CAN_ERR_PROT_STUFF;
break;
default:
- cf->data[2] |= CAN_ERR_PROT_UNSPEC;
cf->data[3] = ecc & ECC_SEG;
break;
}
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index ef655177bb5e..39cf911f7a1e 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -487,7 +487,6 @@ static int m_can_handle_lec_err(struct net_device *dev,
* type of the last error to occur on the CAN bus
*/
cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
- cf->data[2] |= CAN_ERR_PROT_UNSPEC;
switch (lec_type) {
case LEC_STUFF_ERROR:
@@ -500,8 +499,7 @@ static int m_can_handle_lec_err(struct net_device *dev,
break;
case LEC_ACK_ERROR:
netdev_dbg(dev, "ack error\n");
- cf->data[3] |= (CAN_ERR_PROT_LOC_ACK |
- CAN_ERR_PROT_LOC_ACK_DEL);
+ cf->data[3] = CAN_ERR_PROT_LOC_ACK;
break;
case LEC_BIT1_ERROR:
netdev_dbg(dev, "bit1 error\n");
@@ -513,8 +511,7 @@ static int m_can_handle_lec_err(struct net_device *dev,
break;
case LEC_CRC_ERROR:
netdev_dbg(dev, "CRC error\n");
- cf->data[3] |= (CAN_ERR_PROT_LOC_CRC_SEQ |
- CAN_ERR_PROT_LOC_CRC_DEL);
+ cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
break;
default:
break;
diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c
index e187ca783da0..c1317889d3d8 100644
--- a/drivers/net/can/pch_can.c
+++ b/drivers/net/can/pch_can.c
@@ -559,8 +559,7 @@ static void pch_can_error(struct net_device *ndev, u32 status)
stats->rx_errors++;
break;
case PCH_CRC_ERR:
- cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ |
- CAN_ERR_PROT_LOC_CRC_DEL;
+ cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
priv->can.can_stats.bus_error++;
stats->rx_errors++;
break;
diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c
index 7bd54191f962..bc46be39549d 100644
--- a/drivers/net/can/rcar_can.c
+++ b/drivers/net/can/rcar_can.c
@@ -241,17 +241,16 @@ static void rcar_can_error(struct net_device *ndev)
u8 ecsr;
netdev_dbg(priv->ndev, "Bus error interrupt:\n");
- if (skb) {
+ if (skb)
cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
- cf->data[2] = CAN_ERR_PROT_UNSPEC;
- }
+
ecsr = readb(&priv->regs->ecsr);
if (ecsr & RCAR_CAN_ECSR_ADEF) {
netdev_dbg(priv->ndev, "ACK Delimiter Error\n");
tx_errors++;
writeb(~RCAR_CAN_ECSR_ADEF, &priv->regs->ecsr);
if (skb)
- cf->data[3] |= CAN_ERR_PROT_LOC_ACK_DEL;
+ cf->data[3] = CAN_ERR_PROT_LOC_ACK_DEL;
}
if (ecsr & RCAR_CAN_ECSR_BE0F) {
netdev_dbg(priv->ndev, "Bit Error (dominant)\n");
@@ -272,7 +271,7 @@ static void rcar_can_error(struct net_device *ndev)
rx_errors++;
writeb(~RCAR_CAN_ECSR_CEF, &priv->regs->ecsr);
if (skb)
- cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
+ cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
}
if (ecsr & RCAR_CAN_ECSR_AEF) {
netdev_dbg(priv->ndev, "ACK Error\n");
@@ -280,7 +279,7 @@ static void rcar_can_error(struct net_device *ndev)
writeb(~RCAR_CAN_ECSR_AEF, &priv->regs->ecsr);
if (skb) {
cf->can_id |= CAN_ERR_ACK;
- cf->data[3] |= CAN_ERR_PROT_LOC_ACK;
+ cf->data[3] = CAN_ERR_PROT_LOC_ACK;
}
}
if (ecsr & RCAR_CAN_ECSR_FEF) {
diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index 7b92e911a616..8dda3b703d39 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -218,6 +218,9 @@ static void sja1000_start(struct net_device *dev)
priv->write_reg(priv, SJA1000_RXERR, 0x0);
priv->read_reg(priv, SJA1000_ECC);
+ /* clear interrupt flags */
+ priv->read_reg(priv, SJA1000_IR);
+
/* leave reset mode */
set_normal_mode(dev);
}
@@ -446,7 +449,6 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status)
cf->data[2] |= CAN_ERR_PROT_STUFF;
break;
default:
- cf->data[2] |= CAN_ERR_PROT_UNSPEC;
cf->data[3] = ecc & ECC_SEG;
break;
}
diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c
index d9a42c646783..68ef0a4cd821 100644
--- a/drivers/net/can/sun4i_can.c
+++ b/drivers/net/can/sun4i_can.c
@@ -575,7 +575,6 @@ static int sun4i_can_err(struct net_device *dev, u8 isrc, u8 status)
cf->data[2] |= CAN_ERR_PROT_STUFF;
break;
default:
- cf->data[2] |= CAN_ERR_PROT_UNSPEC;
cf->data[3] = (ecc & SUN4I_STA_ERR_SEG_CODE)
>> 16;
break;
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index cf345cbfe819..680d1ff07a55 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -722,7 +722,6 @@ static int ti_hecc_error(struct net_device *ndev, int int_status,
if (err_status & HECC_BUS_ERROR) {
++priv->can.can_stats.bus_error;
cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
- cf->data[2] |= CAN_ERR_PROT_UNSPEC;
if (err_status & HECC_CANES_FE) {
hecc_set_bit(priv, HECC_CANES, HECC_CANES_FE);
cf->data[2] |= CAN_ERR_PROT_FORM;
@@ -737,13 +736,11 @@ static int ti_hecc_error(struct net_device *ndev, int int_status,
}
if (err_status & HECC_CANES_CRCE) {
hecc_set_bit(priv, HECC_CANES, HECC_CANES_CRCE);
- cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ |
- CAN_ERR_PROT_LOC_CRC_DEL;
+ cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
}
if (err_status & HECC_CANES_ACKE) {
hecc_set_bit(priv, HECC_CANES, HECC_CANES_ACKE);
- cf->data[3] |= CAN_ERR_PROT_LOC_ACK |
- CAN_ERR_PROT_LOC_ACK_DEL;
+ cf->data[3] = CAN_ERR_PROT_LOC_ACK;
}
}
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index 2d390384ef3b..fc5b75675cd8 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -377,7 +377,6 @@ static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg)
cf->data[2] |= CAN_ERR_PROT_STUFF;
break;
default:
- cf->data[2] |= CAN_ERR_PROT_UNSPEC;
cf->data[3] = ecc & SJA1000_ECC_SEG;
break;
}
diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c
index 0e5a4493ba4f..113e64fcd73b 100644
--- a/drivers/net/can/usb/esd_usb2.c
+++ b/drivers/net/can/usb/esd_usb2.c
@@ -282,7 +282,6 @@ static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv,
cf->data[2] |= CAN_ERR_PROT_STUFF;
break;
default:
- cf->data[2] |= CAN_ERR_PROT_UNSPEC;
cf->data[3] = ecc & SJA1000_ECC_SEG;
break;
}
diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index 8b17a9065b0b..022bfa13ebfa 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -944,10 +944,9 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
if (es->leaf.error_factor & M16C_EF_ACKE)
- cf->data[3] |= (CAN_ERR_PROT_LOC_ACK);
+ cf->data[3] = CAN_ERR_PROT_LOC_ACK;
if (es->leaf.error_factor & M16C_EF_CRCE)
- cf->data[3] |= (CAN_ERR_PROT_LOC_CRC_SEQ |
- CAN_ERR_PROT_LOC_CRC_DEL);
+ cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
if (es->leaf.error_factor & M16C_EF_FORME)
cf->data[2] |= CAN_ERR_PROT_FORM;
if (es->leaf.error_factor & M16C_EF_STFE)
diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
index de95b1ccba3e..a731720f1d13 100644
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c
@@ -401,9 +401,7 @@ static void usb_8dev_rx_err_msg(struct usb_8dev_priv *priv,
tx_errors = 1;
break;
case USB_8DEV_STATUSMSG_CRC:
- cf->data[2] |= CAN_ERR_PROT_UNSPEC;
- cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ |
- CAN_ERR_PROT_LOC_CRC_DEL;
+ cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
rx_errors = 1;
break;
case USB_8DEV_STATUSMSG_BIT0:
diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index fc55e8e0351d..51670b322409 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -608,17 +608,15 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr)
/* Check for error interrupt */
if (isr & XCAN_IXR_ERROR_MASK) {
- if (skb) {
+ if (skb)
cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
- cf->data[2] |= CAN_ERR_PROT_UNSPEC;
- }
/* Check for Ack error interrupt */
if (err_status & XCAN_ESR_ACKER_MASK) {
stats->tx_errors++;
if (skb) {
cf->can_id |= CAN_ERR_ACK;
- cf->data[3] |= CAN_ERR_PROT_LOC_ACK;
+ cf->data[3] = CAN_ERR_PROT_LOC_ACK;
}
}
@@ -654,8 +652,7 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr)
stats->rx_errors++;
if (skb) {
cf->can_id |= CAN_ERR_PROT;
- cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ |
- CAN_ERR_PROT_LOC_CRC_DEL;
+ cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
}
}
priv->can.can_stats.bus_error++;
diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c
index 9093577755f6..0527f485c3dc 100644
--- a/drivers/net/dsa/mv88e6060.c
+++ b/drivers/net/dsa/mv88e6060.c
@@ -15,9 +15,7 @@
#include <linux/netdevice.h>
#include <linux/phy.h>
#include <net/dsa.h>
-
-#define REG_PORT(p) (8 + (p))
-#define REG_GLOBAL 0x0f
+#include "mv88e6060.h"
static int reg_read(struct dsa_switch *ds, int addr, int reg)
{
@@ -67,13 +65,14 @@ static char *mv88e6060_probe(struct device *host_dev, int sw_addr)
if (bus == NULL)
return NULL;
- ret = mdiobus_read(bus, sw_addr + REG_PORT(0), 0x03);
+ ret = mdiobus_read(bus, sw_addr + REG_PORT(0), PORT_SWITCH_ID);
if (ret >= 0) {
- if (ret == 0x0600)
+ if (ret == PORT_SWITCH_ID_6060)
return "Marvell 88E6060 (A0)";
- if (ret == 0x0601 || ret == 0x0602)
+ if (ret == PORT_SWITCH_ID_6060_R1 ||
+ ret == PORT_SWITCH_ID_6060_R2)
return "Marvell 88E6060 (B0)";
- if ((ret & 0xfff0) == 0x0600)
+ if ((ret & PORT_SWITCH_ID_6060_MASK) == PORT_SWITCH_ID_6060)
return "Marvell 88E6060";
}
@@ -87,22 +86,26 @@ static int mv88e6060_switch_reset(struct dsa_switch *ds)
unsigned long timeout;
/* Set all ports to the disabled state. */
- for (i = 0; i < 6; i++) {
- ret = REG_READ(REG_PORT(i), 0x04);
- REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc);
+ for (i = 0; i < MV88E6060_PORTS; i++) {
+ ret = REG_READ(REG_PORT(i), PORT_CONTROL);
+ REG_WRITE(REG_PORT(i), PORT_CONTROL,
+ ret & ~PORT_CONTROL_STATE_MASK);
}
/* Wait for transmit queues to drain. */
usleep_range(2000, 4000);
/* Reset the switch. */
- REG_WRITE(REG_GLOBAL, 0x0a, 0xa130);
+ REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL,
+ GLOBAL_ATU_CONTROL_SWRESET |
+ GLOBAL_ATU_CONTROL_ATUSIZE_1024 |
+ GLOBAL_ATU_CONTROL_ATE_AGE_5MIN);
/* Wait up to one second for reset to complete. */
timeout = jiffies + 1 * HZ;
while (time_before(jiffies, timeout)) {
- ret = REG_READ(REG_GLOBAL, 0x00);
- if ((ret & 0x8000) == 0x0000)
+ ret = REG_READ(REG_GLOBAL, GLOBAL_STATUS);
+ if (ret & GLOBAL_STATUS_INIT_READY)
break;
usleep_range(1000, 2000);
@@ -119,13 +122,15 @@ static int mv88e6060_setup_global(struct dsa_switch *ds)
* set the maximum frame size to 1536 bytes, and mask all
* interrupt sources.
*/
- REG_WRITE(REG_GLOBAL, 0x04, 0x0800);
+ REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, GLOBAL_CONTROL_MAX_FRAME_1536);
/* Enable automatic address learning, set the address
* database size to 1024 entries, and set the default aging
* time to 5 minutes.
*/
- REG_WRITE(REG_GLOBAL, 0x0a, 0x2130);
+ REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL,
+ GLOBAL_ATU_CONTROL_ATUSIZE_1024 |
+ GLOBAL_ATU_CONTROL_ATE_AGE_5MIN);
return 0;
}
@@ -139,25 +144,30 @@ static int mv88e6060_setup_port(struct dsa_switch *ds, int p)
* state to Forwarding. Additionally, if this is the CPU
* port, enable Ingress and Egress Trailer tagging mode.
*/
- REG_WRITE(addr, 0x04, dsa_is_cpu_port(ds, p) ? 0x4103 : 0x0003);
+ REG_WRITE(addr, PORT_CONTROL,
+ dsa_is_cpu_port(ds, p) ?
+ PORT_CONTROL_TRAILER |
+ PORT_CONTROL_INGRESS_MODE |
+ PORT_CONTROL_STATE_FORWARDING :
+ PORT_CONTROL_STATE_FORWARDING);
/* Port based VLAN map: give each port its own address
* database, allow the CPU port to talk to each of the 'real'
* ports, and allow each of the 'real' ports to only talk to
* the CPU port.
*/
- REG_WRITE(addr, 0x06,
- ((p & 0xf) << 12) |
- (dsa_is_cpu_port(ds, p) ?
- ds->phys_port_mask :
- (1 << ds->dst->cpu_port)));
+ REG_WRITE(addr, PORT_VLAN_MAP,
+ ((p & 0xf) << PORT_VLAN_MAP_DBNUM_SHIFT) |
+ (dsa_is_cpu_port(ds, p) ?
+ ds->phys_port_mask :
+ BIT(ds->dst->cpu_port)));
/* Port Association Vector: when learning source addresses
* of packets, add the address to the address database using
* a port bitmap that has only the bit for this port set and
* the other bits clear.
*/
- REG_WRITE(addr, 0x0b, 1 << p);
+ REG_WRITE(addr, PORT_ASSOC_VECTOR, BIT(p));
return 0;
}
@@ -177,7 +187,7 @@ static int mv88e6060_setup(struct dsa_switch *ds)
if (ret < 0)
return ret;
- for (i = 0; i < 6; i++) {
+ for (i = 0; i < MV88E6060_PORTS; i++) {
ret = mv88e6060_setup_port(ds, i);
if (ret < 0)
return ret;
@@ -188,16 +198,17 @@ static int mv88e6060_setup(struct dsa_switch *ds)
static int mv88e6060_set_addr(struct dsa_switch *ds, u8 *addr)
{
- REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]);
- REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]);
- REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]);
+ /* Use the same MAC Address as FD Pause frames for all ports */
+ REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, (addr[0] << 9) | addr[1]);
+ REG_WRITE(REG_GLOBAL, GLOBAL_MAC_23, (addr[2] << 8) | addr[3]);
+ REG_WRITE(REG_GLOBAL, GLOBAL_MAC_45, (addr[4] << 8) | addr[5]);
return 0;
}
static int mv88e6060_port_to_phy_addr(int port)
{
- if (port >= 0 && port <= 5)
+ if (port >= 0 && port < MV88E6060_PORTS)
return port;
return -1;
}
@@ -225,54 +236,6 @@ mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val)
return reg_write(ds, addr, regnum, val);
}
-static void mv88e6060_poll_link(struct dsa_switch *ds)
-{
- int i;
-
- for (i = 0; i < DSA_MAX_PORTS; i++) {
- struct net_device *dev;
- int uninitialized_var(port_status);
- int link;
- int speed;
- int duplex;
- int fc;
-
- dev = ds->ports[i];
- if (dev == NULL)
- continue;
-
- link = 0;
- if (dev->flags & IFF_UP) {
- port_status = reg_read(ds, REG_PORT(i), 0x00);
- if (port_status < 0)
- continue;
-
- link = !!(port_status & 0x1000);
- }
-
- if (!link) {
- if (netif_carrier_ok(dev)) {
- netdev_info(dev, "link down\n");
- netif_carrier_off(dev);
- }
- continue;
- }
-
- speed = (port_status & 0x0100) ? 100 : 10;
- duplex = (port_status & 0x0200) ? 1 : 0;
- fc = ((port_status & 0xc000) == 0xc000) ? 1 : 0;
-
- if (!netif_carrier_ok(dev)) {
- netdev_info(dev,
- "link up, %d Mb/s, %s duplex, flow control %sabled\n",
- speed,
- duplex ? "full" : "half",
- fc ? "en" : "dis");
- netif_carrier_on(dev);
- }
- }
-}
-
static struct dsa_switch_driver mv88e6060_switch_driver = {
.tag_protocol = DSA_TAG_PROTO_TRAILER,
.probe = mv88e6060_probe,
@@ -280,7 +243,6 @@ static struct dsa_switch_driver mv88e6060_switch_driver = {
.set_addr = mv88e6060_set_addr,
.phy_read = mv88e6060_phy_read,
.phy_write = mv88e6060_phy_write,
- .poll_link = mv88e6060_poll_link,
};
static int __init mv88e6060_init(void)
diff --git a/drivers/net/dsa/mv88e6060.h b/drivers/net/dsa/mv88e6060.h
new file mode 100644
index 000000000000..cc9b2ed4aff4
--- /dev/null
+++ b/drivers/net/dsa/mv88e6060.h
@@ -0,0 +1,111 @@
+/*
+ * drivers/net/dsa/mv88e6060.h - Marvell 88e6060 switch chip support
+ * Copyright (c) 2015 Neil Armstrong
+ *
+ * Based on mv88e6xxx.h
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __MV88E6060_H
+#define __MV88E6060_H
+
+#define MV88E6060_PORTS 6
+
+#define REG_PORT(p) (0x8 + (p))
+#define PORT_STATUS 0x00
+#define PORT_STATUS_PAUSE_EN BIT(15)
+#define PORT_STATUS_MY_PAUSE BIT(14)
+#define PORT_STATUS_FC (PORT_STATUS_MY_PAUSE | PORT_STATUS_PAUSE_EN)
+#define PORT_STATUS_RESOLVED BIT(13)
+#define PORT_STATUS_LINK BIT(12)
+#define PORT_STATUS_PORTMODE BIT(11)
+#define PORT_STATUS_PHYMODE BIT(10)
+#define PORT_STATUS_DUPLEX BIT(9)
+#define PORT_STATUS_SPEED BIT(8)
+#define PORT_SWITCH_ID 0x03
+#define PORT_SWITCH_ID_6060 0x0600
+#define PORT_SWITCH_ID_6060_MASK 0xfff0
+#define PORT_SWITCH_ID_6060_R1 0x0601
+#define PORT_SWITCH_ID_6060_R2 0x0602
+#define PORT_CONTROL 0x04
+#define PORT_CONTROL_FORCE_FLOW_CTRL BIT(15)
+#define PORT_CONTROL_TRAILER BIT(14)
+#define PORT_CONTROL_HEADER BIT(11)
+#define PORT_CONTROL_INGRESS_MODE BIT(8)
+#define PORT_CONTROL_VLAN_TUNNEL BIT(7)
+#define PORT_CONTROL_STATE_MASK 0x03
+#define PORT_CONTROL_STATE_DISABLED 0x00
+#define PORT_CONTROL_STATE_BLOCKING 0x01
+#define PORT_CONTROL_STATE_LEARNING 0x02
+#define PORT_CONTROL_STATE_FORWARDING 0x03
+#define PORT_VLAN_MAP 0x06
+#define PORT_VLAN_MAP_DBNUM_SHIFT 12
+#define PORT_VLAN_MAP_TABLE_MASK 0x1f
+#define PORT_ASSOC_VECTOR 0x0b
+#define PORT_ASSOC_VECTOR_MONITOR BIT(15)
+#define PORT_ASSOC_VECTOR_PAV_MASK 0x1f
+#define PORT_RX_CNTR 0x10
+#define PORT_TX_CNTR 0x11
+
+#define REG_GLOBAL 0x0f
+#define GLOBAL_STATUS 0x00
+#define GLOBAL_STATUS_SW_MODE_MASK (0x3 << 12)
+#define GLOBAL_STATUS_SW_MODE_0 (0x0 << 12)
+#define GLOBAL_STATUS_SW_MODE_1 (0x1 << 12)
+#define GLOBAL_STATUS_SW_MODE_2 (0x2 << 12)
+#define GLOBAL_STATUS_SW_MODE_3 (0x3 << 12)
+#define GLOBAL_STATUS_INIT_READY BIT(11)
+#define GLOBAL_STATUS_ATU_FULL BIT(3)
+#define GLOBAL_STATUS_ATU_DONE BIT(2)
+#define GLOBAL_STATUS_PHY_INT BIT(1)
+#define GLOBAL_STATUS_EEINT BIT(0)
+#define GLOBAL_MAC_01 0x01
+#define GLOBAL_MAC_01_DIFF_ADDR BIT(8)
+#define GLOBAL_MAC_23 0x02
+#define GLOBAL_MAC_45 0x03
+#define GLOBAL_CONTROL 0x04
+#define GLOBAL_CONTROL_DISCARD_EXCESS BIT(13)
+#define GLOBAL_CONTROL_MAX_FRAME_1536 BIT(10)
+#define GLOBAL_CONTROL_RELOAD_EEPROM BIT(9)
+#define GLOBAL_CONTROL_CTRMODE BIT(8)
+#define GLOBAL_CONTROL_ATU_FULL_EN BIT(3)
+#define GLOBAL_CONTROL_ATU_DONE_EN BIT(2)
+#define GLOBAL_CONTROL_PHYINT_EN BIT(1)
+#define GLOBAL_CONTROL_EEPROM_DONE_EN BIT(0)
+#define GLOBAL_ATU_CONTROL 0x0a
+#define GLOBAL_ATU_CONTROL_SWRESET BIT(15)
+#define GLOBAL_ATU_CONTROL_LEARNDIS BIT(14)
+#define GLOBAL_ATU_CONTROL_ATUSIZE_256 (0x0 << 12)
+#define GLOBAL_ATU_CONTROL_ATUSIZE_512 (0x1 << 12)
+#define GLOBAL_ATU_CONTROL_ATUSIZE_1024 (0x2 << 12)
+#define GLOBAL_ATU_CONTROL_ATE_AGE_SHIFT 4
+#define GLOBAL_ATU_CONTROL_ATE_AGE_MASK (0xff << 4)
+#define GLOBAL_ATU_CONTROL_ATE_AGE_5MIN (0x13 << 4)
+#define GLOBAL_ATU_OP 0x0b
+#define GLOBAL_ATU_OP_BUSY BIT(15)
+#define GLOBAL_ATU_OP_NOP (0 << 12)
+#define GLOBAL_ATU_OP_FLUSH_ALL ((1 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_OP_FLUSH_UNLOCKED ((2 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_OP_LOAD_DB ((3 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_OP_GET_NEXT_DB ((4 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_OP_FLUSH_DB ((5 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_OP_FLUSH_UNLOCKED_DB ((6 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_DATA 0x0c
+#define GLOBAL_ATU_DATA_PORT_VECTOR_MASK 0x3f0
+#define GLOBAL_ATU_DATA_PORT_VECTOR_SHIFT 4
+#define GLOBAL_ATU_DATA_STATE_MASK 0x0f
+#define GLOBAL_ATU_DATA_STATE_UNUSED 0x00
+#define GLOBAL_ATU_DATA_STATE_UC_STATIC 0x0e
+#define GLOBAL_ATU_DATA_STATE_UC_LOCKED 0x0f
+#define GLOBAL_ATU_DATA_STATE_MC_STATIC 0x07
+#define GLOBAL_ATU_DATA_STATE_MC_LOCKED 0x0e
+#define GLOBAL_ATU_MAC_01 0x0d
+#define GLOBAL_ATU_MAC_23 0x0e
+#define GLOBAL_ATU_MAC_45 0x0f
+
+#endif
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 05aa7597dab9..31c5e476fd64 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -29,6 +29,7 @@ source "drivers/net/ethernet/apm/Kconfig"
source "drivers/net/ethernet/apple/Kconfig"
source "drivers/net/ethernet/arc/Kconfig"
source "drivers/net/ethernet/atheros/Kconfig"
+source "drivers/net/ethernet/aurora/Kconfig"
source "drivers/net/ethernet/cadence/Kconfig"
source "drivers/net/ethernet/adi/Kconfig"
source "drivers/net/ethernet/broadcom/Kconfig"
@@ -78,7 +79,6 @@ source "drivers/net/ethernet/ibm/Kconfig"
source "drivers/net/ethernet/intel/Kconfig"
source "drivers/net/ethernet/i825xx/Kconfig"
source "drivers/net/ethernet/xscale/Kconfig"
-source "drivers/net/ethernet/icplus/Kconfig"
config JME
tristate "JMicron(R) PCI-Express Gigabit Ethernet support"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index ddfc808110a1..071f84eb6f3f 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_NET_XGENE) += apm/
obj-$(CONFIG_NET_VENDOR_APPLE) += apple/
obj-$(CONFIG_NET_VENDOR_ARC) += arc/
obj-$(CONFIG_NET_VENDOR_ATHEROS) += atheros/
+obj-$(CONFIG_NET_VENDOR_AURORA) += aurora/
obj-$(CONFIG_NET_CADENCE) += cadence/
obj-$(CONFIG_NET_BFIN) += adi/
obj-$(CONFIG_NET_VENDOR_BROADCOM) += broadcom/
@@ -41,7 +42,6 @@ obj-$(CONFIG_NET_VENDOR_IBM) += ibm/
obj-$(CONFIG_NET_VENDOR_INTEL) += intel/
obj-$(CONFIG_NET_VENDOR_I825XX) += i825xx/
obj-$(CONFIG_NET_VENDOR_XSCALE) += xscale/
-obj-$(CONFIG_IP1000) += icplus/
obj-$(CONFIG_JME) += jme.o
obj-$(CONFIG_KORINA) += korina.o
obj-$(CONFIG_LANTIQ_ETOP) += lantiq_etop.o
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index e2afabf3a465..7ccebae9cb48 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -1500,10 +1500,11 @@ pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent)
return -ENODEV;
}
- if (!pci_set_dma_mask(pdev, PCNET32_DMA_MASK)) {
+ err = pci_set_dma_mask(pdev, PCNET32_DMA_MASK);
+ if (err) {
if (pcnet32_debug & NETIF_MSG_PROBE)
pr_err("architecture does not support 32bit PCI busmaster DMA\n");
- return -ENODEV;
+ return err;
}
if (!request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci")) {
if (pcnet32_debug & NETIF_MSG_PROBE)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index 970781a9e677..f6a7161e3b85 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -1849,7 +1849,7 @@ static int xgbe_exit(struct xgbe_prv_data *pdata)
usleep_range(10, 15);
/* Poll Until Poll Condition */
- while (count-- && XGMAC_IOREAD_BITS(pdata, DMA_MR, SWR))
+ while (--count && XGMAC_IOREAD_BITS(pdata, DMA_MR, SWR))
usleep_range(500, 600);
if (!count)
@@ -1873,7 +1873,7 @@ static int xgbe_flush_tx_queues(struct xgbe_prv_data *pdata)
/* Poll Until Poll Condition */
for (i = 0; i < pdata->tx_q_count; i++) {
count = 2000;
- while (count-- && XGMAC_MTL_IOREAD_BITS(pdata, i,
+ while (--count && XGMAC_MTL_IOREAD_BITS(pdata, i,
MTL_Q_TQOMR, FTQ))
usleep_range(500, 600);
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index 991412ce6f48..d0ae1a6cc212 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -289,6 +289,7 @@ static int xgene_enet_setup_tx_desc(struct xgene_enet_desc_ring *tx_ring,
struct sk_buff *skb)
{
struct device *dev = ndev_to_dev(tx_ring->ndev);
+ struct xgene_enet_pdata *pdata = netdev_priv(tx_ring->ndev);
struct xgene_enet_raw_desc *raw_desc;
__le64 *exp_desc = NULL, *exp_bufs = NULL;
dma_addr_t dma_addr, pbuf_addr, *frag_dma_addr;
@@ -419,6 +420,7 @@ out:
raw_desc->m0 = cpu_to_le64(SET_VAL(LL, ll) | SET_VAL(NV, nv) |
SET_VAL(USERINFO, tx_ring->tail));
tx_ring->cp_ring->cp_skb[tx_ring->tail] = skb;
+ pdata->tx_level += count;
tx_ring->tail = tail;
return count;
@@ -429,14 +431,13 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb,
{
struct xgene_enet_pdata *pdata = netdev_priv(ndev);
struct xgene_enet_desc_ring *tx_ring = pdata->tx_ring;
- struct xgene_enet_desc_ring *cp_ring = tx_ring->cp_ring;
- u32 tx_level, cq_level;
+ u32 tx_level = pdata->tx_level;
int count;
- tx_level = pdata->ring_ops->len(tx_ring);
- cq_level = pdata->ring_ops->len(cp_ring);
- if (unlikely(tx_level > pdata->tx_qcnt_hi ||
- cq_level > pdata->cp_qcnt_hi)) {
+ if (tx_level < pdata->txc_level)
+ tx_level += ((typeof(pdata->tx_level))~0U);
+
+ if ((tx_level - pdata->txc_level) > pdata->tx_qcnt_hi) {
netif_stop_queue(ndev);
return NETDEV_TX_BUSY;
}
@@ -450,12 +451,12 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb,
return NETDEV_TX_OK;
}
- pdata->ring_ops->wr_cmd(tx_ring, count);
skb_tx_timestamp(skb);
pdata->stats.tx_packets++;
pdata->stats.tx_bytes += skb->len;
+ pdata->ring_ops->wr_cmd(tx_ring, count);
return NETDEV_TX_OK;
}
@@ -539,10 +540,13 @@ static int xgene_enet_process_ring(struct xgene_enet_desc_ring *ring,
struct xgene_enet_raw_desc *raw_desc, *exp_desc;
u16 head = ring->head;
u16 slots = ring->slots - 1;
- int ret, count = 0, processed = 0;
+ int ret, desc_count, count = 0, processed = 0;
+ bool is_completion;
do {
raw_desc = &ring->raw_desc[head];
+ desc_count = 0;
+ is_completion = false;
exp_desc = NULL;
if (unlikely(xgene_enet_is_desc_slot_empty(raw_desc)))
break;
@@ -559,18 +563,24 @@ static int xgene_enet_process_ring(struct xgene_enet_desc_ring *ring,
}
dma_rmb();
count++;
+ desc_count++;
}
- if (is_rx_desc(raw_desc))
+ if (is_rx_desc(raw_desc)) {
ret = xgene_enet_rx_frame(ring, raw_desc);
- else
+ } else {
ret = xgene_enet_tx_completion(ring, raw_desc);
+ is_completion = true;
+ }
xgene_enet_mark_desc_slot_empty(raw_desc);
if (exp_desc)
xgene_enet_mark_desc_slot_empty(exp_desc);
head = (head + 1) & slots;
count++;
+ desc_count++;
processed++;
+ if (is_completion)
+ pdata->txc_level += desc_count;
if (ret)
break;
@@ -580,10 +590,8 @@ static int xgene_enet_process_ring(struct xgene_enet_desc_ring *ring,
pdata->ring_ops->wr_cmd(ring, -count);
ring->head = head;
- if (netif_queue_stopped(ring->ndev)) {
- if (pdata->ring_ops->len(ring) < pdata->cp_qcnt_low)
- netif_wake_queue(ring->ndev);
- }
+ if (netif_queue_stopped(ring->ndev))
+ netif_start_queue(ring->ndev);
}
return processed;
@@ -688,10 +696,10 @@ static int xgene_enet_open(struct net_device *ndev)
mac_ops->tx_enable(pdata);
mac_ops->rx_enable(pdata);
+ xgene_enet_napi_enable(pdata);
ret = xgene_enet_register_irq(ndev);
if (ret)
return ret;
- xgene_enet_napi_enable(pdata);
if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
phy_start(pdata->phy_dev);
@@ -715,13 +723,13 @@ static int xgene_enet_close(struct net_device *ndev)
else
cancel_delayed_work_sync(&pdata->link_work);
- xgene_enet_napi_disable(pdata);
- xgene_enet_free_irq(ndev);
- xgene_enet_process_ring(pdata->rx_ring, -1);
-
mac_ops->tx_disable(pdata);
mac_ops->rx_disable(pdata);
+ xgene_enet_free_irq(ndev);
+ xgene_enet_napi_disable(pdata);
+ xgene_enet_process_ring(pdata->rx_ring, -1);
+
return 0;
}
@@ -1033,9 +1041,7 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev)
pdata->tx_ring->cp_ring = cp_ring;
pdata->tx_ring->dst_ring_num = xgene_enet_dst_ring_num(cp_ring);
- pdata->tx_qcnt_hi = pdata->tx_ring->slots / 2;
- pdata->cp_qcnt_hi = pdata->rx_ring->slots / 2;
- pdata->cp_qcnt_low = pdata->cp_qcnt_hi / 2;
+ pdata->tx_qcnt_hi = pdata->tx_ring->slots - 128;
return 0;
@@ -1474,15 +1480,15 @@ static int xgene_enet_probe(struct platform_device *pdev)
}
ndev->hw_features = ndev->features;
- ret = register_netdev(ndev);
+ ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64));
if (ret) {
- netdev_err(ndev, "Failed to register netdev\n");
+ netdev_err(ndev, "No usable DMA configuration\n");
goto err;
}
- ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64));
+ ret = register_netdev(ndev);
if (ret) {
- netdev_err(ndev, "No usable DMA configuration\n");
+ netdev_err(ndev, "Failed to register netdev\n");
goto err;
}
@@ -1490,14 +1496,17 @@ static int xgene_enet_probe(struct platform_device *pdev)
if (ret)
goto err;
- xgene_enet_napi_add(pdata);
mac_ops = pdata->mac_ops;
- if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
+ if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) {
ret = xgene_enet_mdio_config(pdata);
- else
+ if (ret)
+ goto err;
+ } else {
INIT_DELAYED_WORK(&pdata->link_work, mac_ops->link_state);
+ }
- return ret;
+ xgene_enet_napi_add(pdata);
+ return 0;
err:
unregister_netdev(ndev);
free_netdev(ndev);
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
index a6e56b88c0a0..1aa72c787f8d 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
@@ -155,11 +155,11 @@ struct xgene_enet_pdata {
enum xgene_enet_id enet_id;
struct xgene_enet_desc_ring *tx_ring;
struct xgene_enet_desc_ring *rx_ring;
+ u16 tx_level;
+ u16 txc_level;
char *dev_name;
u32 rx_buff_cnt;
u32 tx_qcnt_hi;
- u32 cp_qcnt_hi;
- u32 cp_qcnt_low;
u32 rx_irq;
u32 txc_irq;
u8 cq_cnt;
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index c8af3ce3ea38..bd377a6b067d 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -1534,6 +1534,8 @@ static const struct pci_device_id alx_pci_tbl[] = {
.driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2200),
.driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
+ { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2400),
+ .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8162),
.driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8171) },
diff --git a/drivers/net/ethernet/atheros/alx/reg.h b/drivers/net/ethernet/atheros/alx/reg.h
index af006b44b2a6..0959e6824cb6 100644
--- a/drivers/net/ethernet/atheros/alx/reg.h
+++ b/drivers/net/ethernet/atheros/alx/reg.h
@@ -37,6 +37,7 @@
#define ALX_DEV_ID_AR8161 0x1091
#define ALX_DEV_ID_E2200 0xe091
+#define ALX_DEV_ID_E2400 0xe0a1
#define ALX_DEV_ID_AR8162 0x1090
#define ALX_DEV_ID_AR8171 0x10A1
#define ALX_DEV_ID_AR8172 0x10A0
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 2795d6db10e1..8b5988e210d5 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -1016,13 +1016,12 @@ static int atl1c_setup_ring_resources(struct atl1c_adapter *adapter)
sizeof(struct atl1c_recv_ret_status) * rx_desc_count +
8 * 4;
- ring_header->desc = pci_alloc_consistent(pdev, ring_header->size,
- &ring_header->dma);
+ ring_header->desc = dma_zalloc_coherent(&pdev->dev, ring_header->size,
+ &ring_header->dma, GFP_KERNEL);
if (unlikely(!ring_header->desc)) {
- dev_err(&pdev->dev, "pci_alloc_consistend failed\n");
+ dev_err(&pdev->dev, "could not get memory for DMA buffer\n");
goto err_nomem;
}
- memset(ring_header->desc, 0, ring_header->size);
/* init TPD ring */
tpd_ring[0].dma = roundup(ring_header->dma, 8);
diff --git a/drivers/net/ethernet/aurora/Kconfig b/drivers/net/ethernet/aurora/Kconfig
new file mode 100644
index 000000000000..8ba7f8ff3434
--- /dev/null
+++ b/drivers/net/ethernet/aurora/Kconfig
@@ -0,0 +1,21 @@
+config NET_VENDOR_AURORA
+ bool "Aurora VLSI devices"
+ help
+ If you have a network (Ethernet) device belonging to this class,
+ say Y.
+
+ Note that the answer to this question doesn't directly affect the
+ kernel: saying N will just cause the configurator to skip all
+ questions about Aurora devices. If you say Y, you will be asked
+ for your specific device in the following questions.
+
+if NET_VENDOR_AURORA
+
+config AURORA_NB8800
+ tristate "Aurora AU-NB8800 support"
+ depends on HAS_DMA
+ select PHYLIB
+ help
+ Support for the AU-NB8800 gigabit Ethernet controller.
+
+endif
diff --git a/drivers/net/ethernet/aurora/Makefile b/drivers/net/ethernet/aurora/Makefile
new file mode 100644
index 000000000000..6cb528a2fc26
--- /dev/null
+++ b/drivers/net/ethernet/aurora/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_AURORA_NB8800) += nb8800.o
diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
new file mode 100644
index 000000000000..ecc4a334c507
--- /dev/null
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -0,0 +1,1552 @@
+/*
+ * Copyright (C) 2015 Mans Rullgard <mans@mansr.com>
+ *
+ * Mostly rewritten, based on driver from Sigma Designs. Original
+ * copyright notice below.
+ *
+ *
+ * Driver for tangox SMP864x/SMP865x/SMP867x/SMP868x builtin Ethernet Mac.
+ *
+ * Copyright (C) 2005 Maxime Bizon <mbizon@freebox.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/of_device.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/dma-mapping.h>
+#include <linux/phy.h>
+#include <linux/cache.h>
+#include <linux/jiffies.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <asm/barrier.h>
+
+#include "nb8800.h"
+
+static void nb8800_tx_done(struct net_device *dev);
+static int nb8800_dma_stop(struct net_device *dev);
+
+static inline u8 nb8800_readb(struct nb8800_priv *priv, int reg)
+{
+ return readb_relaxed(priv->base + reg);
+}
+
+static inline u32 nb8800_readl(struct nb8800_priv *priv, int reg)
+{
+ return readl_relaxed(priv->base + reg);
+}
+
+static inline void nb8800_writeb(struct nb8800_priv *priv, int reg, u8 val)
+{
+ writeb_relaxed(val, priv->base + reg);
+}
+
+static inline void nb8800_writew(struct nb8800_priv *priv, int reg, u16 val)
+{
+ writew_relaxed(val, priv->base + reg);
+}
+
+static inline void nb8800_writel(struct nb8800_priv *priv, int reg, u32 val)
+{
+ writel_relaxed(val, priv->base + reg);
+}
+
+static inline void nb8800_maskb(struct nb8800_priv *priv, int reg,
+ u32 mask, u32 val)
+{
+ u32 old = nb8800_readb(priv, reg);
+ u32 new = (old & ~mask) | (val & mask);
+
+ if (new != old)
+ nb8800_writeb(priv, reg, new);
+}
+
+static inline void nb8800_maskl(struct nb8800_priv *priv, int reg,
+ u32 mask, u32 val)
+{
+ u32 old = nb8800_readl(priv, reg);
+ u32 new = (old & ~mask) | (val & mask);
+
+ if (new != old)
+ nb8800_writel(priv, reg, new);
+}
+
+static inline void nb8800_modb(struct nb8800_priv *priv, int reg, u8 bits,
+ bool set)
+{
+ nb8800_maskb(priv, reg, bits, set ? bits : 0);
+}
+
+static inline void nb8800_setb(struct nb8800_priv *priv, int reg, u8 bits)
+{
+ nb8800_maskb(priv, reg, bits, bits);
+}
+
+static inline void nb8800_clearb(struct nb8800_priv *priv, int reg, u8 bits)
+{
+ nb8800_maskb(priv, reg, bits, 0);
+}
+
+static inline void nb8800_modl(struct nb8800_priv *priv, int reg, u32 bits,
+ bool set)
+{
+ nb8800_maskl(priv, reg, bits, set ? bits : 0);
+}
+
+static inline void nb8800_setl(struct nb8800_priv *priv, int reg, u32 bits)
+{
+ nb8800_maskl(priv, reg, bits, bits);
+}
+
+static inline void nb8800_clearl(struct nb8800_priv *priv, int reg, u32 bits)
+{
+ nb8800_maskl(priv, reg, bits, 0);
+}
+
+static int nb8800_mdio_wait(struct mii_bus *bus)
+{
+ struct nb8800_priv *priv = bus->priv;
+ u32 val;
+
+ return readl_poll_timeout_atomic(priv->base + NB8800_MDIO_CMD,
+ val, !(val & MDIO_CMD_GO), 1, 1000);
+}
+
+static int nb8800_mdio_cmd(struct mii_bus *bus, u32 cmd)
+{
+ struct nb8800_priv *priv = bus->priv;
+ int err;
+
+ err = nb8800_mdio_wait(bus);
+ if (err)
+ return err;
+
+ nb8800_writel(priv, NB8800_MDIO_CMD, cmd);
+ udelay(10);
+ nb8800_writel(priv, NB8800_MDIO_CMD, cmd | MDIO_CMD_GO);
+
+ return nb8800_mdio_wait(bus);
+}
+
+static int nb8800_mdio_read(struct mii_bus *bus, int phy_id, int reg)
+{
+ struct nb8800_priv *priv = bus->priv;
+ u32 val;
+ int err;
+
+ err = nb8800_mdio_cmd(bus, MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg));
+ if (err)
+ return err;
+
+ val = nb8800_readl(priv, NB8800_MDIO_STS);
+ if (val & MDIO_STS_ERR)
+ return 0xffff;
+
+ return val & 0xffff;
+}
+
+static int nb8800_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
+{
+ u32 cmd = MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg) |
+ MDIO_CMD_DATA(val) | MDIO_CMD_WR;
+
+ return nb8800_mdio_cmd(bus, cmd);
+}
+
+static void nb8800_mac_tx(struct net_device *dev, bool enable)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+
+ while (nb8800_readl(priv, NB8800_TXC_CR) & TCR_EN)
+ cpu_relax();
+
+ nb8800_modb(priv, NB8800_TX_CTL1, TX_EN, enable);
+}
+
+static void nb8800_mac_rx(struct net_device *dev, bool enable)
+{
+ nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_EN, enable);
+}
+
+static void nb8800_mac_af(struct net_device *dev, bool enable)
+{
+ nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_AF_EN, enable);
+}
+
+static void nb8800_start_rx(struct net_device *dev)
+{
+ nb8800_setl(netdev_priv(dev), NB8800_RXC_CR, RCR_EN);
+}
+
+static int nb8800_alloc_rx(struct net_device *dev, unsigned int i, bool napi)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ struct nb8800_rx_desc *rxd = &priv->rx_descs[i];
+ struct nb8800_rx_buf *rxb = &priv->rx_bufs[i];
+ int size = L1_CACHE_ALIGN(RX_BUF_SIZE);
+ dma_addr_t dma_addr;
+ struct page *page;
+ unsigned long offset;
+ void *data;
+
+ data = napi ? napi_alloc_frag(size) : netdev_alloc_frag(size);
+ if (!data)
+ return -ENOMEM;
+
+ page = virt_to_head_page(data);
+ offset = data - page_address(page);
+
+ dma_addr = dma_map_page(&dev->dev, page, offset, RX_BUF_SIZE,
+ DMA_FROM_DEVICE);
+
+ if (dma_mapping_error(&dev->dev, dma_addr)) {
+ skb_free_frag(data);
+ return -ENOMEM;
+ }
+
+ rxb->page = page;
+ rxb->offset = offset;
+ rxd->desc.s_addr = dma_addr;
+
+ return 0;
+}
+
+static void nb8800_receive(struct net_device *dev, unsigned int i,
+ unsigned int len)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ struct nb8800_rx_desc *rxd = &priv->rx_descs[i];
+ struct page *page = priv->rx_bufs[i].page;
+ int offset = priv->rx_bufs[i].offset;
+ void *data = page_address(page) + offset;
+ dma_addr_t dma = rxd->desc.s_addr;
+ struct sk_buff *skb;
+ unsigned int size;
+ int err;
+
+ size = len <= RX_COPYBREAK ? len : RX_COPYHDR;
+
+ skb = napi_alloc_skb(&priv->napi, size);
+ if (!skb) {
+ netdev_err(dev, "rx skb allocation failed\n");
+ dev->stats.rx_dropped++;
+ return;
+ }
+
+ if (len <= RX_COPYBREAK) {
+ dma_sync_single_for_cpu(&dev->dev, dma, len, DMA_FROM_DEVICE);
+ memcpy(skb_put(skb, len), data, len);
+ dma_sync_single_for_device(&dev->dev, dma, len,
+ DMA_FROM_DEVICE);
+ } else {
+ err = nb8800_alloc_rx(dev, i, true);
+ if (err) {
+ netdev_err(dev, "rx buffer allocation failed\n");
+ dev->stats.rx_dropped++;
+ return;
+ }
+
+ dma_unmap_page(&dev->dev, dma, RX_BUF_SIZE, DMA_FROM_DEVICE);
+ memcpy(skb_put(skb, RX_COPYHDR), data, RX_COPYHDR);
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+ offset + RX_COPYHDR, len - RX_COPYHDR,
+ RX_BUF_SIZE);
+ }
+
+ skb->protocol = eth_type_trans(skb, dev);
+ napi_gro_receive(&priv->napi, skb);
+}
+
+static void nb8800_rx_error(struct net_device *dev, u32 report)
+{
+ if (report & RX_LENGTH_ERR)
+ dev->stats.rx_length_errors++;
+
+ if (report & RX_FCS_ERR)
+ dev->stats.rx_crc_errors++;
+
+ if (report & RX_FIFO_OVERRUN)
+ dev->stats.rx_fifo_errors++;
+
+ if (report & RX_ALIGNMENT_ERROR)
+ dev->stats.rx_frame_errors++;
+
+ dev->stats.rx_errors++;
+}
+
+static int nb8800_poll(struct napi_struct *napi, int budget)
+{
+ struct net_device *dev = napi->dev;
+ struct nb8800_priv *priv = netdev_priv(dev);
+ struct nb8800_rx_desc *rxd;
+ unsigned int last = priv->rx_eoc;
+ unsigned int next;
+ int work = 0;
+
+ nb8800_tx_done(dev);
+
+again:
+ while (work < budget) {
+ struct nb8800_rx_buf *rxb;
+ unsigned int len;
+
+ next = (last + 1) % RX_DESC_COUNT;
+
+ rxb = &priv->rx_bufs[next];
+ rxd = &priv->rx_descs[next];
+
+ if (!rxd->report)
+ break;
+
+ len = RX_BYTES_TRANSFERRED(rxd->report);
+
+ if (IS_RX_ERROR(rxd->report))
+ nb8800_rx_error(dev, rxd->report);
+ else
+ nb8800_receive(dev, next, len);
+
+ dev->stats.rx_packets++;
+ dev->stats.rx_bytes += len;
+
+ if (rxd->report & RX_MULTICAST_PKT)
+ dev->stats.multicast++;
+
+ rxd->report = 0;
+ last = next;
+ work++;
+ }
+
+ if (work) {
+ priv->rx_descs[last].desc.config |= DESC_EOC;
+ wmb(); /* ensure new EOC is written before clearing old */
+ priv->rx_descs[priv->rx_eoc].desc.config &= ~DESC_EOC;
+ priv->rx_eoc = last;
+ nb8800_start_rx(dev);
+ }
+
+ if (work < budget) {
+ nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq);
+
+ /* If a packet arrived after we last checked but
+ * before writing RX_ITR, the interrupt will be
+ * delayed, so we retrieve it now.
+ */
+ if (priv->rx_descs[next].report)
+ goto again;
+
+ napi_complete_done(napi, work);
+ }
+
+ return work;
+}
+
+static void __nb8800_tx_dma_start(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ struct nb8800_tx_buf *txb;
+ u32 txc_cr;
+
+ txb = &priv->tx_bufs[priv->tx_queue];
+ if (!txb->ready)
+ return;
+
+ txc_cr = nb8800_readl(priv, NB8800_TXC_CR);
+ if (txc_cr & TCR_EN)
+ return;
+
+ nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc);
+ wmb(); /* ensure desc addr is written before starting DMA */
+ nb8800_writel(priv, NB8800_TXC_CR, txc_cr | TCR_EN);
+
+ priv->tx_queue = (priv->tx_queue + txb->chain_len) % TX_DESC_COUNT;
+}
+
+static void nb8800_tx_dma_start(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+
+ spin_lock_irq(&priv->tx_lock);
+ __nb8800_tx_dma_start(dev);
+ spin_unlock_irq(&priv->tx_lock);
+}
+
+static void nb8800_tx_dma_start_irq(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+
+ spin_lock(&priv->tx_lock);
+ __nb8800_tx_dma_start(dev);
+ spin_unlock(&priv->tx_lock);
+}
+
+static int nb8800_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ struct nb8800_tx_desc *txd;
+ struct nb8800_tx_buf *txb;
+ struct nb8800_dma_desc *desc;
+ dma_addr_t dma_addr;
+ unsigned int dma_len;
+ unsigned int align;
+ unsigned int next;
+
+ if (atomic_read(&priv->tx_free) <= NB8800_DESC_LOW) {
+ netif_stop_queue(dev);
+ return NETDEV_TX_BUSY;
+ }
+
+ align = (8 - (uintptr_t)skb->data) & 7;
+
+ dma_len = skb->len - align;
+ dma_addr = dma_map_single(&dev->dev, skb->data + align,
+ dma_len, DMA_TO_DEVICE);
+
+ if (dma_mapping_error(&dev->dev, dma_addr)) {
+ netdev_err(dev, "tx dma mapping error\n");
+ kfree_skb(skb);
+ dev->stats.tx_dropped++;
+ return NETDEV_TX_OK;
+ }
+
+ if (atomic_dec_return(&priv->tx_free) <= NB8800_DESC_LOW) {
+ netif_stop_queue(dev);
+ skb->xmit_more = 0;
+ }
+
+ next = priv->tx_next;
+ txb = &priv->tx_bufs[next];
+ txd = &priv->tx_descs[next];
+ desc = &txd->desc[0];
+
+ next = (next + 1) % TX_DESC_COUNT;
+
+ if (align) {
+ memcpy(txd->buf, skb->data, align);
+
+ desc->s_addr =
+ txb->dma_desc + offsetof(struct nb8800_tx_desc, buf);
+ desc->n_addr = txb->dma_desc + sizeof(txd->desc[0]);
+ desc->config = DESC_BTS(2) | DESC_DS | align;
+
+ desc++;
+ }
+
+ desc->s_addr = dma_addr;
+ desc->n_addr = priv->tx_bufs[next].dma_desc;
+ desc->config = DESC_BTS(2) | DESC_DS | DESC_EOF | dma_len;
+
+ if (!skb->xmit_more)
+ desc->config |= DESC_EOC;
+
+ txb->skb = skb;
+ txb->dma_addr = dma_addr;
+ txb->dma_len = dma_len;
+
+ if (!priv->tx_chain) {
+ txb->chain_len = 1;
+ priv->tx_chain = txb;
+ } else {
+ priv->tx_chain->chain_len++;
+ }
+
+ netdev_sent_queue(dev, skb->len);
+
+ priv->tx_next = next;
+
+ if (!skb->xmit_more) {
+ smp_wmb();
+ priv->tx_chain->ready = true;
+ priv->tx_chain = NULL;
+ nb8800_tx_dma_start(dev);
+ }
+
+ return NETDEV_TX_OK;
+}
+
+static void nb8800_tx_error(struct net_device *dev, u32 report)
+{
+ if (report & TX_LATE_COLLISION)
+ dev->stats.collisions++;
+
+ if (report & TX_PACKET_DROPPED)
+ dev->stats.tx_dropped++;
+
+ if (report & TX_FIFO_UNDERRUN)
+ dev->stats.tx_fifo_errors++;
+
+ dev->stats.tx_errors++;
+}
+
+static void nb8800_tx_done(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ unsigned int limit = priv->tx_next;
+ unsigned int done = priv->tx_done;
+ unsigned int packets = 0;
+ unsigned int len = 0;
+
+ while (done != limit) {
+ struct nb8800_tx_desc *txd = &priv->tx_descs[done];
+ struct nb8800_tx_buf *txb = &priv->tx_bufs[done];
+ struct sk_buff *skb;
+
+ if (!txd->report)
+ break;
+
+ skb = txb->skb;
+ len += skb->len;
+
+ dma_unmap_single(&dev->dev, txb->dma_addr, txb->dma_len,
+ DMA_TO_DEVICE);
+
+ if (IS_TX_ERROR(txd->report)) {
+ nb8800_tx_error(dev, txd->report);
+ kfree_skb(skb);
+ } else {
+ consume_skb(skb);
+ }
+
+ dev->stats.tx_packets++;
+ dev->stats.tx_bytes += TX_BYTES_TRANSFERRED(txd->report);
+ dev->stats.collisions += TX_EARLY_COLLISIONS(txd->report);
+
+ txb->skb = NULL;
+ txb->ready = false;
+ txd->report = 0;
+
+ done = (done + 1) % TX_DESC_COUNT;
+ packets++;
+ }
+
+ if (packets) {
+ smp_mb__before_atomic();
+ atomic_add(packets, &priv->tx_free);
+ netdev_completed_queue(dev, packets, len);
+ netif_wake_queue(dev);
+ priv->tx_done = done;
+ }
+}
+
+static irqreturn_t nb8800_irq(int irq, void *dev_id)
+{
+ struct net_device *dev = dev_id;
+ struct nb8800_priv *priv = netdev_priv(dev);
+ irqreturn_t ret = IRQ_NONE;
+ u32 val;
+
+ /* tx interrupt */
+ val = nb8800_readl(priv, NB8800_TXC_SR);
+ if (val) {
+ nb8800_writel(priv, NB8800_TXC_SR, val);
+
+ if (val & TSR_DI)
+ nb8800_tx_dma_start_irq(dev);
+
+ if (val & TSR_TI)
+ napi_schedule_irqoff(&priv->napi);
+
+ if (unlikely(val & TSR_DE))
+ netdev_err(dev, "TX DMA error\n");
+
+ /* should never happen with automatic status retrieval */
+ if (unlikely(val & TSR_TO))
+ netdev_err(dev, "TX Status FIFO overflow\n");
+
+ ret = IRQ_HANDLED;
+ }
+
+ /* rx interrupt */
+ val = nb8800_readl(priv, NB8800_RXC_SR);
+ if (val) {
+ nb8800_writel(priv, NB8800_RXC_SR, val);
+
+ if (likely(val & (RSR_RI | RSR_DI))) {
+ nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_poll);
+ napi_schedule_irqoff(&priv->napi);
+ }
+
+ if (unlikely(val & RSR_DE))
+ netdev_err(dev, "RX DMA error\n");
+
+ /* should never happen with automatic status retrieval */
+ if (unlikely(val & RSR_RO))
+ netdev_err(dev, "RX Status FIFO overflow\n");
+
+ ret = IRQ_HANDLED;
+ }
+
+ return ret;
+}
+
+static void nb8800_mac_config(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ bool gigabit = priv->speed == SPEED_1000;
+ u32 mac_mode_mask = RGMII_MODE | HALF_DUPLEX | GMAC_MODE;
+ u32 mac_mode = 0;
+ u32 slot_time;
+ u32 phy_clk;
+ u32 ict;
+
+ if (!priv->duplex)
+ mac_mode |= HALF_DUPLEX;
+
+ if (gigabit) {
+ if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII)
+ mac_mode |= RGMII_MODE;
+
+ mac_mode |= GMAC_MODE;
+ phy_clk = 125000000;
+
+ /* Should be 512 but register is only 8 bits */
+ slot_time = 255;
+ } else {
+ phy_clk = 25000000;
+ slot_time = 128;
+ }
+
+ ict = DIV_ROUND_UP(phy_clk, clk_get_rate(priv->clk));
+
+ nb8800_writeb(priv, NB8800_IC_THRESHOLD, ict);
+ nb8800_writeb(priv, NB8800_SLOT_TIME, slot_time);
+ nb8800_maskb(priv, NB8800_MAC_MODE, mac_mode_mask, mac_mode);
+}
+
+static void nb8800_pause_config(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ struct phy_device *phydev = priv->phydev;
+ u32 rxcr;
+
+ if (priv->pause_aneg) {
+ if (!phydev || !phydev->link)
+ return;
+
+ priv->pause_rx = phydev->pause;
+ priv->pause_tx = phydev->pause ^ phydev->asym_pause;
+ }
+
+ nb8800_modb(priv, NB8800_RX_CTL, RX_PAUSE_EN, priv->pause_rx);
+
+ rxcr = nb8800_readl(priv, NB8800_RXC_CR);
+ if (!!(rxcr & RCR_FL) == priv->pause_tx)
+ return;
+
+ if (netif_running(dev)) {
+ napi_disable(&priv->napi);
+ netif_tx_lock_bh(dev);
+ nb8800_dma_stop(dev);
+ nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx);
+ nb8800_start_rx(dev);
+ netif_tx_unlock_bh(dev);
+ napi_enable(&priv->napi);
+ } else {
+ nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx);
+ }
+}
+
+static void nb8800_link_reconfigure(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ struct phy_device *phydev = priv->phydev;
+ int change = 0;
+
+ if (phydev->link) {
+ if (phydev->speed != priv->speed) {
+ priv->speed = phydev->speed;
+ change = 1;
+ }
+
+ if (phydev->duplex != priv->duplex) {
+ priv->duplex = phydev->duplex;
+ change = 1;
+ }
+
+ if (change)
+ nb8800_mac_config(dev);
+
+ nb8800_pause_config(dev);
+ }
+
+ if (phydev->link != priv->link) {
+ priv->link = phydev->link;
+ change = 1;
+ }
+
+ if (change)
+ phy_print_status(priv->phydev);
+}
+
+static void nb8800_update_mac_addr(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ int i;
+
+ for (i = 0; i < ETH_ALEN; i++)
+ nb8800_writeb(priv, NB8800_SRC_ADDR(i), dev->dev_addr[i]);
+
+ for (i = 0; i < ETH_ALEN; i++)
+ nb8800_writeb(priv, NB8800_UC_ADDR(i), dev->dev_addr[i]);
+}
+
+static int nb8800_set_mac_address(struct net_device *dev, void *addr)
+{
+ struct sockaddr *sock = addr;
+
+ if (netif_running(dev))
+ return -EBUSY;
+
+ ether_addr_copy(dev->dev_addr, sock->sa_data);
+ nb8800_update_mac_addr(dev);
+
+ return 0;
+}
+
+static void nb8800_mc_init(struct net_device *dev, int val)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+
+ nb8800_writeb(priv, NB8800_MC_INIT, val);
+ readb_poll_timeout_atomic(priv->base + NB8800_MC_INIT, val, !val,
+ 1, 1000);
+}
+
+static void nb8800_set_rx_mode(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ struct netdev_hw_addr *ha;
+ int i;
+
+ if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
+ nb8800_mac_af(dev, false);
+ return;
+ }
+
+ nb8800_mac_af(dev, true);
+ nb8800_mc_init(dev, 0);
+
+ netdev_for_each_mc_addr(ha, dev) {
+ for (i = 0; i < ETH_ALEN; i++)
+ nb8800_writeb(priv, NB8800_MC_ADDR(i), ha->addr[i]);
+
+ nb8800_mc_init(dev, 0xff);
+ }
+}
+
+#define RX_DESC_SIZE (RX_DESC_COUNT * sizeof(struct nb8800_rx_desc))
+#define TX_DESC_SIZE (TX_DESC_COUNT * sizeof(struct nb8800_tx_desc))
+
+static void nb8800_dma_free(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ unsigned int i;
+
+ if (priv->rx_bufs) {
+ for (i = 0; i < RX_DESC_COUNT; i++)
+ if (priv->rx_bufs[i].page)
+ put_page(priv->rx_bufs[i].page);
+
+ kfree(priv->rx_bufs);
+ priv->rx_bufs = NULL;
+ }
+
+ if (priv->tx_bufs) {
+ for (i = 0; i < TX_DESC_COUNT; i++)
+ kfree_skb(priv->tx_bufs[i].skb);
+
+ kfree(priv->tx_bufs);
+ priv->tx_bufs = NULL;
+ }
+
+ if (priv->rx_descs) {
+ dma_free_coherent(dev->dev.parent, RX_DESC_SIZE, priv->rx_descs,
+ priv->rx_desc_dma);
+ priv->rx_descs = NULL;
+ }
+
+ if (priv->tx_descs) {
+ dma_free_coherent(dev->dev.parent, TX_DESC_SIZE, priv->tx_descs,
+ priv->tx_desc_dma);
+ priv->tx_descs = NULL;
+ }
+}
+
+static void nb8800_dma_reset(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ struct nb8800_rx_desc *rxd;
+ struct nb8800_tx_desc *txd;
+ unsigned int i;
+
+ for (i = 0; i < RX_DESC_COUNT; i++) {
+ dma_addr_t rx_dma = priv->rx_desc_dma + i * sizeof(*rxd);
+
+ rxd = &priv->rx_descs[i];
+ rxd->desc.n_addr = rx_dma + sizeof(*rxd);
+ rxd->desc.r_addr =
+ rx_dma + offsetof(struct nb8800_rx_desc, report);
+ rxd->desc.config = priv->rx_dma_config;
+ rxd->report = 0;
+ }
+
+ rxd->desc.n_addr = priv->rx_desc_dma;
+ rxd->desc.config |= DESC_EOC;
+
+ priv->rx_eoc = RX_DESC_COUNT - 1;
+
+ for (i = 0; i < TX_DESC_COUNT; i++) {
+ struct nb8800_tx_buf *txb = &priv->tx_bufs[i];
+ dma_addr_t r_dma = txb->dma_desc +
+ offsetof(struct nb8800_tx_desc, report);
+
+ txd = &priv->tx_descs[i];
+ txd->desc[0].r_addr = r_dma;
+ txd->desc[1].r_addr = r_dma;
+ txd->report = 0;
+ }
+
+ priv->tx_next = 0;
+ priv->tx_queue = 0;
+ priv->tx_done = 0;
+ atomic_set(&priv->tx_free, TX_DESC_COUNT);
+
+ nb8800_writel(priv, NB8800_RX_DESC_ADDR, priv->rx_desc_dma);
+
+ wmb(); /* ensure all setup is written before starting */
+}
+
+static int nb8800_dma_init(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ unsigned int n_rx = RX_DESC_COUNT;
+ unsigned int n_tx = TX_DESC_COUNT;
+ unsigned int i;
+ int err;
+
+ priv->rx_descs = dma_alloc_coherent(dev->dev.parent, RX_DESC_SIZE,
+ &priv->rx_desc_dma, GFP_KERNEL);
+ if (!priv->rx_descs)
+ goto err_out;
+
+ priv->rx_bufs = kcalloc(n_rx, sizeof(*priv->rx_bufs), GFP_KERNEL);
+ if (!priv->rx_bufs)
+ goto err_out;
+
+ for (i = 0; i < n_rx; i++) {
+ err = nb8800_alloc_rx(dev, i, false);
+ if (err)
+ goto err_out;
+ }
+
+ priv->tx_descs = dma_alloc_coherent(dev->dev.parent, TX_DESC_SIZE,
+ &priv->tx_desc_dma, GFP_KERNEL);
+ if (!priv->tx_descs)
+ goto err_out;
+
+ priv->tx_bufs = kcalloc(n_tx, sizeof(*priv->tx_bufs), GFP_KERNEL);
+ if (!priv->tx_bufs)
+ goto err_out;
+
+ for (i = 0; i < n_tx; i++)
+ priv->tx_bufs[i].dma_desc =
+ priv->tx_desc_dma + i * sizeof(struct nb8800_tx_desc);
+
+ nb8800_dma_reset(dev);
+
+ return 0;
+
+err_out:
+ nb8800_dma_free(dev);
+
+ return -ENOMEM;
+}
+
+static int nb8800_dma_stop(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ struct nb8800_tx_buf *txb = &priv->tx_bufs[0];
+ struct nb8800_tx_desc *txd = &priv->tx_descs[0];
+ int retry = 5;
+ u32 txcr;
+ u32 rxcr;
+ int err;
+ unsigned int i;
+
+ /* wait for tx to finish */
+ err = readl_poll_timeout_atomic(priv->base + NB8800_TXC_CR, txcr,
+ !(txcr & TCR_EN) &&
+ priv->tx_done == priv->tx_next,
+ 1000, 1000000);
+ if (err)
+ return err;
+
+ /* The rx DMA only stops if it reaches the end of chain.
+ * To make this happen, we set the EOC flag on all rx
+ * descriptors, put the device in loopback mode, and send
+ * a few dummy frames. The interrupt handler will ignore
+ * these since NAPI is disabled and no real frames are in
+ * the tx queue.
+ */
+
+ for (i = 0; i < RX_DESC_COUNT; i++)
+ priv->rx_descs[i].desc.config |= DESC_EOC;
+
+ txd->desc[0].s_addr =
+ txb->dma_desc + offsetof(struct nb8800_tx_desc, buf);
+ txd->desc[0].config = DESC_BTS(2) | DESC_DS | DESC_EOF | DESC_EOC | 8;
+ memset(txd->buf, 0, sizeof(txd->buf));
+
+ nb8800_mac_af(dev, false);
+ nb8800_setb(priv, NB8800_MAC_MODE, LOOPBACK_EN);
+
+ do {
+ nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc);
+ wmb();
+ nb8800_writel(priv, NB8800_TXC_CR, txcr | TCR_EN);
+
+ err = readl_poll_timeout_atomic(priv->base + NB8800_RXC_CR,
+ rxcr, !(rxcr & RCR_EN),
+ 1000, 100000);
+ } while (err && --retry);
+
+ nb8800_mac_af(dev, true);
+ nb8800_clearb(priv, NB8800_MAC_MODE, LOOPBACK_EN);
+ nb8800_dma_reset(dev);
+
+ return retry ? 0 : -ETIMEDOUT;
+}
+
+static void nb8800_pause_adv(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ u32 adv = 0;
+
+ if (!priv->phydev)
+ return;
+
+ if (priv->pause_rx)
+ adv |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
+ if (priv->pause_tx)
+ adv ^= ADVERTISED_Asym_Pause;
+
+ priv->phydev->supported |= adv;
+ priv->phydev->advertising |= adv;
+}
+
+static int nb8800_open(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ int err;
+
+ /* clear any pending interrupts */
+ nb8800_writel(priv, NB8800_RXC_SR, 0xf);
+ nb8800_writel(priv, NB8800_TXC_SR, 0xf);
+
+ err = nb8800_dma_init(dev);
+ if (err)
+ return err;
+
+ err = request_irq(dev->irq, nb8800_irq, 0, dev_name(&dev->dev), dev);
+ if (err)
+ goto err_free_dma;
+
+ nb8800_mac_rx(dev, true);
+ nb8800_mac_tx(dev, true);
+
+ priv->phydev = of_phy_connect(dev, priv->phy_node,
+ nb8800_link_reconfigure, 0,
+ priv->phy_mode);
+ if (!priv->phydev)
+ goto err_free_irq;
+
+ nb8800_pause_adv(dev);
+
+ netdev_reset_queue(dev);
+ napi_enable(&priv->napi);
+ netif_start_queue(dev);
+
+ nb8800_start_rx(dev);
+ phy_start(priv->phydev);
+
+ return 0;
+
+err_free_irq:
+ free_irq(dev->irq, dev);
+err_free_dma:
+ nb8800_dma_free(dev);
+
+ return err;
+}
+
+static int nb8800_stop(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+
+ phy_stop(priv->phydev);
+
+ netif_stop_queue(dev);
+ napi_disable(&priv->napi);
+
+ nb8800_dma_stop(dev);
+ nb8800_mac_rx(dev, false);
+ nb8800_mac_tx(dev, false);
+
+ phy_disconnect(priv->phydev);
+ priv->phydev = NULL;
+
+ free_irq(dev->irq, dev);
+
+ nb8800_dma_free(dev);
+
+ return 0;
+}
+
+static int nb8800_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+
+ return phy_mii_ioctl(priv->phydev, rq, cmd);
+}
+
+static const struct net_device_ops nb8800_netdev_ops = {
+ .ndo_open = nb8800_open,
+ .ndo_stop = nb8800_stop,
+ .ndo_start_xmit = nb8800_xmit,
+ .ndo_set_mac_address = nb8800_set_mac_address,
+ .ndo_set_rx_mode = nb8800_set_rx_mode,
+ .ndo_do_ioctl = nb8800_ioctl,
+ .ndo_change_mtu = eth_change_mtu,
+ .ndo_validate_addr = eth_validate_addr,
+};
+
+static int nb8800_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+
+ if (!priv->phydev)
+ return -ENODEV;
+
+ return phy_ethtool_gset(priv->phydev, cmd);
+}
+
+static int nb8800_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+
+ if (!priv->phydev)
+ return -ENODEV;
+
+ return phy_ethtool_sset(priv->phydev, cmd);
+}
+
+static int nb8800_nway_reset(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+
+ if (!priv->phydev)
+ return -ENODEV;
+
+ return genphy_restart_aneg(priv->phydev);
+}
+
+static void nb8800_get_pauseparam(struct net_device *dev,
+ struct ethtool_pauseparam *pp)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+
+ pp->autoneg = priv->pause_aneg;
+ pp->rx_pause = priv->pause_rx;
+ pp->tx_pause = priv->pause_tx;
+}
+
+static int nb8800_set_pauseparam(struct net_device *dev,
+ struct ethtool_pauseparam *pp)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+
+ priv->pause_aneg = pp->autoneg;
+ priv->pause_rx = pp->rx_pause;
+ priv->pause_tx = pp->tx_pause;
+
+ nb8800_pause_adv(dev);
+
+ if (!priv->pause_aneg)
+ nb8800_pause_config(dev);
+ else if (priv->phydev)
+ phy_start_aneg(priv->phydev);
+
+ return 0;
+}
+
+static const char nb8800_stats_names[][ETH_GSTRING_LEN] = {
+ "rx_bytes_ok",
+ "rx_frames_ok",
+ "rx_undersize_frames",
+ "rx_fragment_frames",
+ "rx_64_byte_frames",
+ "rx_127_byte_frames",
+ "rx_255_byte_frames",
+ "rx_511_byte_frames",
+ "rx_1023_byte_frames",
+ "rx_max_size_frames",
+ "rx_oversize_frames",
+ "rx_bad_fcs_frames",
+ "rx_broadcast_frames",
+ "rx_multicast_frames",
+ "rx_control_frames",
+ "rx_pause_frames",
+ "rx_unsup_control_frames",
+ "rx_align_error_frames",
+ "rx_overrun_frames",
+ "rx_jabber_frames",
+ "rx_bytes",
+ "rx_frames",
+
+ "tx_bytes_ok",
+ "tx_frames_ok",
+ "tx_64_byte_frames",
+ "tx_127_byte_frames",
+ "tx_255_byte_frames",
+ "tx_511_byte_frames",
+ "tx_1023_byte_frames",
+ "tx_max_size_frames",
+ "tx_oversize_frames",
+ "tx_broadcast_frames",
+ "tx_multicast_frames",
+ "tx_control_frames",
+ "tx_pause_frames",
+ "tx_underrun_frames",
+ "tx_single_collision_frames",
+ "tx_multi_collision_frames",
+ "tx_deferred_collision_frames",
+ "tx_late_collision_frames",
+ "tx_excessive_collision_frames",
+ "tx_bytes",
+ "tx_frames",
+ "tx_collisions",
+};
+
+#define NB8800_NUM_STATS ARRAY_SIZE(nb8800_stats_names)
+
+static int nb8800_get_sset_count(struct net_device *dev, int sset)
+{
+ if (sset == ETH_SS_STATS)
+ return NB8800_NUM_STATS;
+
+ return -EOPNOTSUPP;
+}
+
+static void nb8800_get_strings(struct net_device *dev, u32 sset, u8 *buf)
+{
+ if (sset == ETH_SS_STATS)
+ memcpy(buf, &nb8800_stats_names, sizeof(nb8800_stats_names));
+}
+
+static u32 nb8800_read_stat(struct net_device *dev, int index)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+
+ nb8800_writeb(priv, NB8800_STAT_INDEX, index);
+
+ return nb8800_readl(priv, NB8800_STAT_DATA);
+}
+
+static void nb8800_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *estats, u64 *st)
+{
+ unsigned int i;
+ u32 rx, tx;
+
+ for (i = 0; i < NB8800_NUM_STATS / 2; i++) {
+ rx = nb8800_read_stat(dev, i);
+ tx = nb8800_read_stat(dev, i | 0x80);
+ st[i] = rx;
+ st[i + NB8800_NUM_STATS / 2] = tx;
+ }
+}
+
+static const struct ethtool_ops nb8800_ethtool_ops = {
+ .get_settings = nb8800_get_settings,
+ .set_settings = nb8800_set_settings,
+ .nway_reset = nb8800_nway_reset,
+ .get_link = ethtool_op_get_link,
+ .get_pauseparam = nb8800_get_pauseparam,
+ .set_pauseparam = nb8800_set_pauseparam,
+ .get_sset_count = nb8800_get_sset_count,
+ .get_strings = nb8800_get_strings,
+ .get_ethtool_stats = nb8800_get_ethtool_stats,
+};
+
+static int nb8800_hw_init(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ u32 val;
+
+ val = TX_RETRY_EN | TX_PAD_EN | TX_APPEND_FCS;
+ nb8800_writeb(priv, NB8800_TX_CTL1, val);
+
+ /* Collision retry count */
+ nb8800_writeb(priv, NB8800_TX_CTL2, 5);
+
+ val = RX_PAD_STRIP | RX_AF_EN;
+ nb8800_writeb(priv, NB8800_RX_CTL, val);
+
+ /* Chosen by fair dice roll */
+ nb8800_writeb(priv, NB8800_RANDOM_SEED, 4);
+
+ /* TX cycles per deferral period */
+ nb8800_writeb(priv, NB8800_TX_SDP, 12);
+
+ /* The following three threshold values have been
+ * experimentally determined for good results.
+ */
+
+ /* RX/TX FIFO threshold for partial empty (64-bit entries) */
+ nb8800_writeb(priv, NB8800_PE_THRESHOLD, 0);
+
+ /* RX/TX FIFO threshold for partial full (64-bit entries) */
+ nb8800_writeb(priv, NB8800_PF_THRESHOLD, 255);
+
+ /* Buffer size for transmit (64-bit entries) */
+ nb8800_writeb(priv, NB8800_TX_BUFSIZE, 64);
+
+ /* Configure tx DMA */
+
+ val = nb8800_readl(priv, NB8800_TXC_CR);
+ val &= TCR_LE; /* keep endian setting */
+ val |= TCR_DM; /* DMA descriptor mode */
+ val |= TCR_RS; /* automatically store tx status */
+ val |= TCR_DIE; /* interrupt on DMA chain completion */
+ val |= TCR_TFI(7); /* interrupt after 7 frames transmitted */
+ val |= TCR_BTS(2); /* 32-byte bus transaction size */
+ nb8800_writel(priv, NB8800_TXC_CR, val);
+
+ /* TX complete interrupt after 10 ms or 7 frames (see above) */
+ val = clk_get_rate(priv->clk) / 100;
+ nb8800_writel(priv, NB8800_TX_ITR, val);
+
+ /* Configure rx DMA */
+
+ val = nb8800_readl(priv, NB8800_RXC_CR);
+ val &= RCR_LE; /* keep endian setting */
+ val |= RCR_DM; /* DMA descriptor mode */
+ val |= RCR_RS; /* automatically store rx status */
+ val |= RCR_DIE; /* interrupt at end of DMA chain */
+ val |= RCR_RFI(7); /* interrupt after 7 frames received */
+ val |= RCR_BTS(2); /* 32-byte bus transaction size */
+ nb8800_writel(priv, NB8800_RXC_CR, val);
+
+ /* The rx interrupt can fire before the DMA has completed
+ * unless a small delay is added. 50 us is hopefully enough.
+ */
+ priv->rx_itr_irq = clk_get_rate(priv->clk) / 20000;
+
+ /* In NAPI poll mode we want to disable interrupts, but the
+ * hardware does not permit this. Delay 10 ms instead.
+ */
+ priv->rx_itr_poll = clk_get_rate(priv->clk) / 100;
+
+ nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq);
+
+ priv->rx_dma_config = RX_BUF_SIZE | DESC_BTS(2) | DESC_DS | DESC_EOF;
+
+ /* Flow control settings */
+
+ /* Pause time of 0.1 ms */
+ val = 100000 / 512;
+ nb8800_writeb(priv, NB8800_PQ1, val >> 8);
+ nb8800_writeb(priv, NB8800_PQ2, val & 0xff);
+
+ /* Auto-negotiate by default */
+ priv->pause_aneg = true;
+ priv->pause_rx = true;
+ priv->pause_tx = true;
+
+ nb8800_mc_init(dev, 0);
+
+ return 0;
+}
+
+static int nb8800_tangox_init(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ u32 pad_mode = PAD_MODE_MII;
+
+ switch (priv->phy_mode) {
+ case PHY_INTERFACE_MODE_MII:
+ case PHY_INTERFACE_MODE_GMII:
+ pad_mode = PAD_MODE_MII;
+ break;
+
+ case PHY_INTERFACE_MODE_RGMII:
+ pad_mode = PAD_MODE_RGMII;
+ break;
+
+ case PHY_INTERFACE_MODE_RGMII_TXID:
+ pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
+ break;
+
+ default:
+ dev_err(dev->dev.parent, "unsupported phy mode %s\n",
+ phy_modes(priv->phy_mode));
+ return -EINVAL;
+ }
+
+ nb8800_writeb(priv, NB8800_TANGOX_PAD_MODE, pad_mode);
+
+ return 0;
+}
+
+static int nb8800_tangox_reset(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ int clk_div;
+
+ nb8800_writeb(priv, NB8800_TANGOX_RESET, 0);
+ usleep_range(1000, 10000);
+ nb8800_writeb(priv, NB8800_TANGOX_RESET, 1);
+
+ wmb(); /* ensure reset is cleared before proceeding */
+
+ clk_div = DIV_ROUND_UP(clk_get_rate(priv->clk), 2 * MAX_MDC_CLOCK);
+ nb8800_writew(priv, NB8800_TANGOX_MDIO_CLKDIV, clk_div);
+
+ return 0;
+}
+
+static const struct nb8800_ops nb8800_tangox_ops = {
+ .init = nb8800_tangox_init,
+ .reset = nb8800_tangox_reset,
+};
+
+static int nb8800_tango4_init(struct net_device *dev)
+{
+ struct nb8800_priv *priv = netdev_priv(dev);
+ int err;
+
+ err = nb8800_tangox_init(dev);
+ if (err)
+ return err;
+
+ /* On tango4 interrupt on DMA completion per frame works and gives
+ * better performance despite generating more rx interrupts.
+ */
+
+ /* Disable unnecessary interrupt on rx completion */
+ nb8800_clearl(priv, NB8800_RXC_CR, RCR_RFI(7));
+
+ /* Request interrupt on descriptor DMA completion */
+ priv->rx_dma_config |= DESC_ID;
+
+ return 0;
+}
+
+static const struct nb8800_ops nb8800_tango4_ops = {
+ .init = nb8800_tango4_init,
+ .reset = nb8800_tangox_reset,
+};
+
+static const struct of_device_id nb8800_dt_ids[] = {
+ {
+ .compatible = "aurora,nb8800",
+ },
+ {
+ .compatible = "sigma,smp8642-ethernet",
+ .data = &nb8800_tangox_ops,
+ },
+ {
+ .compatible = "sigma,smp8734-ethernet",
+ .data = &nb8800_tango4_ops,
+ },
+ { }
+};
+
+static int nb8800_probe(struct platform_device *pdev)
+{
+ const struct of_device_id *match;
+ const struct nb8800_ops *ops = NULL;
+ struct nb8800_priv *priv;
+ struct resource *res;
+ struct net_device *dev;
+ struct mii_bus *bus;
+ const unsigned char *mac;
+ void __iomem *base;
+ int irq;
+ int ret;
+
+ match = of_match_device(nb8800_dt_ids, &pdev->dev);
+ if (match)
+ ops = match->data;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq <= 0) {
+ dev_err(&pdev->dev, "No IRQ\n");
+ return -EINVAL;
+ }
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ dev_dbg(&pdev->dev, "AU-NB8800 Ethernet at %pa\n", &res->start);
+
+ dev = alloc_etherdev(sizeof(*priv));
+ if (!dev)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, dev);
+ SET_NETDEV_DEV(dev, &pdev->dev);
+
+ priv = netdev_priv(dev);
+ priv->base = base;
+
+ priv->phy_mode = of_get_phy_mode(pdev->dev.of_node);
+ if (priv->phy_mode < 0)
+ priv->phy_mode = PHY_INTERFACE_MODE_RGMII;
+
+ priv->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(priv->clk)) {
+ dev_err(&pdev->dev, "failed to get clock\n");
+ ret = PTR_ERR(priv->clk);
+ goto err_free_dev;
+ }
+
+ ret = clk_prepare_enable(priv->clk);
+ if (ret)
+ goto err_free_dev;
+
+ spin_lock_init(&priv->tx_lock);
+
+ if (ops && ops->reset) {
+ ret = ops->reset(dev);
+ if (ret)
+ goto err_free_dev;
+ }
+
+ bus = devm_mdiobus_alloc(&pdev->dev);
+ if (!bus) {
+ ret = -ENOMEM;
+ goto err_disable_clk;
+ }
+
+ bus->name = "nb8800-mii";
+ bus->read = nb8800_mdio_read;
+ bus->write = nb8800_mdio_write;
+ bus->parent = &pdev->dev;
+ snprintf(bus->id, MII_BUS_ID_SIZE, "%lx.nb8800-mii",
+ (unsigned long)res->start);
+ bus->priv = priv;
+
+ ret = of_mdiobus_register(bus, pdev->dev.of_node);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to register MII bus\n");
+ goto err_disable_clk;
+ }
+
+ priv->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+ if (!priv->phy_node) {
+ dev_err(&pdev->dev, "no PHY specified\n");
+ ret = -ENODEV;
+ goto err_free_bus;
+ }
+
+ priv->mii_bus = bus;
+
+ ret = nb8800_hw_init(dev);
+ if (ret)
+ goto err_free_bus;
+
+ if (ops && ops->init) {
+ ret = ops->init(dev);
+ if (ret)
+ goto err_free_bus;
+ }
+
+ dev->netdev_ops = &nb8800_netdev_ops;
+ dev->ethtool_ops = &nb8800_ethtool_ops;
+ dev->flags |= IFF_MULTICAST;
+ dev->irq = irq;
+
+ mac = of_get_mac_address(pdev->dev.of_node);
+ if (mac)
+ ether_addr_copy(dev->dev_addr, mac);
+
+ if (!is_valid_ether_addr(dev->dev_addr))
+ eth_hw_addr_random(dev);
+
+ nb8800_update_mac_addr(dev);
+
+ netif_carrier_off(dev);
+
+ ret = register_netdev(dev);
+ if (ret) {
+ netdev_err(dev, "failed to register netdev\n");
+ goto err_free_dma;
+ }
+
+ netif_napi_add(dev, &priv->napi, nb8800_poll, NAPI_POLL_WEIGHT);
+
+ netdev_info(dev, "MAC address %pM\n", dev->dev_addr);
+
+ return 0;
+
+err_free_dma:
+ nb8800_dma_free(dev);
+err_free_bus:
+ mdiobus_unregister(bus);
+err_disable_clk:
+ clk_disable_unprepare(priv->clk);
+err_free_dev:
+ free_netdev(dev);
+
+ return ret;
+}
+
+static int nb8800_remove(struct platform_device *pdev)
+{
+ struct net_device *ndev = platform_get_drvdata(pdev);
+ struct nb8800_priv *priv = netdev_priv(ndev);
+
+ unregister_netdev(ndev);
+
+ mdiobus_unregister(priv->mii_bus);
+
+ clk_disable_unprepare(priv->clk);
+
+ nb8800_dma_free(ndev);
+ free_netdev(ndev);
+
+ return 0;
+}
+
+static struct platform_driver nb8800_driver = {
+ .driver = {
+ .name = "nb8800",
+ .of_match_table = nb8800_dt_ids,
+ },
+ .probe = nb8800_probe,
+ .remove = nb8800_remove,
+};
+
+module_platform_driver(nb8800_driver);
+
+MODULE_DESCRIPTION("Aurora AU-NB8800 Ethernet driver");
+MODULE_AUTHOR("Mans Rullgard <mans@mansr.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/aurora/nb8800.h b/drivers/net/ethernet/aurora/nb8800.h
new file mode 100644
index 000000000000..e5adbc2aac9f
--- /dev/null
+++ b/drivers/net/ethernet/aurora/nb8800.h
@@ -0,0 +1,316 @@
+#ifndef _NB8800_H_
+#define _NB8800_H_
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/phy.h>
+#include <linux/clk.h>
+#include <linux/bitops.h>
+
+#define RX_DESC_COUNT 256
+#define TX_DESC_COUNT 256
+
+#define NB8800_DESC_LOW 4
+
+#define RX_BUF_SIZE 1552
+
+#define RX_COPYBREAK 256
+#define RX_COPYHDR 128
+
+#define MAX_MDC_CLOCK 2500000
+
+/* Stargate Solutions SSN8800 core registers */
+#define NB8800_TX_CTL1 0x000
+#define TX_TPD BIT(5)
+#define TX_APPEND_FCS BIT(4)
+#define TX_PAD_EN BIT(3)
+#define TX_RETRY_EN BIT(2)
+#define TX_EN BIT(0)
+
+#define NB8800_TX_CTL2 0x001
+
+#define NB8800_RX_CTL 0x004
+#define RX_BC_DISABLE BIT(7)
+#define RX_RUNT BIT(6)
+#define RX_AF_EN BIT(5)
+#define RX_PAUSE_EN BIT(3)
+#define RX_SEND_CRC BIT(2)
+#define RX_PAD_STRIP BIT(1)
+#define RX_EN BIT(0)
+
+#define NB8800_RANDOM_SEED 0x008
+#define NB8800_TX_SDP 0x14
+#define NB8800_TX_TPDP1 0x18
+#define NB8800_TX_TPDP2 0x19
+#define NB8800_SLOT_TIME 0x1c
+
+#define NB8800_MDIO_CMD 0x020
+#define MDIO_CMD_GO BIT(31)
+#define MDIO_CMD_WR BIT(26)
+#define MDIO_CMD_ADDR(x) ((x) << 21)
+#define MDIO_CMD_REG(x) ((x) << 16)
+#define MDIO_CMD_DATA(x) ((x) << 0)
+
+#define NB8800_MDIO_STS 0x024
+#define MDIO_STS_ERR BIT(31)
+
+#define NB8800_MC_ADDR(i) (0x028 + (i))
+#define NB8800_MC_INIT 0x02e
+#define NB8800_UC_ADDR(i) (0x03c + (i))
+
+#define NB8800_MAC_MODE 0x044
+#define RGMII_MODE BIT(7)
+#define HALF_DUPLEX BIT(4)
+#define BURST_EN BIT(3)
+#define LOOPBACK_EN BIT(2)
+#define GMAC_MODE BIT(0)
+
+#define NB8800_IC_THRESHOLD 0x050
+#define NB8800_PE_THRESHOLD 0x051
+#define NB8800_PF_THRESHOLD 0x052
+#define NB8800_TX_BUFSIZE 0x054
+#define NB8800_FIFO_CTL 0x056
+#define NB8800_PQ1 0x060
+#define NB8800_PQ2 0x061
+#define NB8800_SRC_ADDR(i) (0x06a + (i))
+#define NB8800_STAT_DATA 0x078
+#define NB8800_STAT_INDEX 0x07c
+#define NB8800_STAT_CLEAR 0x07d
+
+#define NB8800_SLEEP_MODE 0x07e
+#define SLEEP_MODE BIT(0)
+
+#define NB8800_WAKEUP 0x07f
+#define WAKEUP BIT(0)
+
+/* Aurora NB8800 host interface registers */
+#define NB8800_TXC_CR 0x100
+#define TCR_LK BIT(12)
+#define TCR_DS BIT(11)
+#define TCR_BTS(x) (((x) & 0x7) << 8)
+#define TCR_DIE BIT(7)
+#define TCR_TFI(x) (((x) & 0x7) << 4)
+#define TCR_LE BIT(3)
+#define TCR_RS BIT(2)
+#define TCR_DM BIT(1)
+#define TCR_EN BIT(0)
+
+#define NB8800_TXC_SR 0x104
+#define TSR_DE BIT(3)
+#define TSR_DI BIT(2)
+#define TSR_TO BIT(1)
+#define TSR_TI BIT(0)
+
+#define NB8800_TX_SAR 0x108
+#define NB8800_TX_DESC_ADDR 0x10c
+
+#define NB8800_TX_REPORT_ADDR 0x110
+#define TX_BYTES_TRANSFERRED(x) (((x) >> 16) & 0xffff)
+#define TX_FIRST_DEFERRAL BIT(7)
+#define TX_EARLY_COLLISIONS(x) (((x) >> 3) & 0xf)
+#define TX_LATE_COLLISION BIT(2)
+#define TX_PACKET_DROPPED BIT(1)
+#define TX_FIFO_UNDERRUN BIT(0)
+#define IS_TX_ERROR(r) ((r) & 0x07)
+
+#define NB8800_TX_FIFO_SR 0x114
+#define NB8800_TX_ITR 0x118
+
+#define NB8800_RXC_CR 0x200
+#define RCR_FL BIT(13)
+#define RCR_LK BIT(12)
+#define RCR_DS BIT(11)
+#define RCR_BTS(x) (((x) & 7) << 8)
+#define RCR_DIE BIT(7)
+#define RCR_RFI(x) (((x) & 7) << 4)
+#define RCR_LE BIT(3)
+#define RCR_RS BIT(2)
+#define RCR_DM BIT(1)
+#define RCR_EN BIT(0)
+
+#define NB8800_RXC_SR 0x204
+#define RSR_DE BIT(3)
+#define RSR_DI BIT(2)
+#define RSR_RO BIT(1)
+#define RSR_RI BIT(0)
+
+#define NB8800_RX_SAR 0x208
+#define NB8800_RX_DESC_ADDR 0x20c
+
+#define NB8800_RX_REPORT_ADDR 0x210
+#define RX_BYTES_TRANSFERRED(x) (((x) >> 16) & 0xFFFF)
+#define RX_MULTICAST_PKT BIT(9)
+#define RX_BROADCAST_PKT BIT(8)
+#define RX_LENGTH_ERR BIT(7)
+#define RX_FCS_ERR BIT(6)
+#define RX_RUNT_PKT BIT(5)
+#define RX_FIFO_OVERRUN BIT(4)
+#define RX_LATE_COLLISION BIT(3)
+#define RX_ALIGNMENT_ERROR BIT(2)
+#define RX_ERROR_MASK 0xfc
+#define IS_RX_ERROR(r) ((r) & RX_ERROR_MASK)
+
+#define NB8800_RX_FIFO_SR 0x214
+#define NB8800_RX_ITR 0x218
+
+/* Sigma Designs SMP86xx additional registers */
+#define NB8800_TANGOX_PAD_MODE 0x400
+#define PAD_MODE_MASK 0x7
+#define PAD_MODE_MII 0x0
+#define PAD_MODE_RGMII 0x1
+#define PAD_MODE_GTX_CLK_INV BIT(3)
+#define PAD_MODE_GTX_CLK_DELAY BIT(4)
+
+#define NB8800_TANGOX_MDIO_CLKDIV 0x420
+#define NB8800_TANGOX_RESET 0x424
+
+/* Hardware DMA descriptor */
+struct nb8800_dma_desc {
+ u32 s_addr; /* start address */
+ u32 n_addr; /* next descriptor address */
+ u32 r_addr; /* report address */
+ u32 config;
+} __aligned(8);
+
+#define DESC_ID BIT(23)
+#define DESC_EOC BIT(22)
+#define DESC_EOF BIT(21)
+#define DESC_LK BIT(20)
+#define DESC_DS BIT(19)
+#define DESC_BTS(x) (((x) & 0x7) << 16)
+
+/* DMA descriptor and associated data for rx.
+ * Allocated from coherent memory.
+ */
+struct nb8800_rx_desc {
+ /* DMA descriptor */
+ struct nb8800_dma_desc desc;
+
+ /* Status report filled in by hardware */
+ u32 report;
+};
+
+/* Address of buffer on rx ring */
+struct nb8800_rx_buf {
+ struct page *page;
+ unsigned long offset;
+};
+
+/* DMA descriptors and associated data for tx.
+ * Allocated from coherent memory.
+ */
+struct nb8800_tx_desc {
+ /* DMA descriptor. The second descriptor is used if packet
+ * data is unaligned.
+ */
+ struct nb8800_dma_desc desc[2];
+
+ /* Status report filled in by hardware */
+ u32 report;
+
+ /* Bounce buffer for initial unaligned part of packet */
+ u8 buf[8] __aligned(8);
+};
+
+/* Packet in tx queue */
+struct nb8800_tx_buf {
+ /* Currently queued skb */
+ struct sk_buff *skb;
+
+ /* DMA address of the first descriptor */
+ dma_addr_t dma_desc;
+
+ /* DMA address of packet data */
+ dma_addr_t dma_addr;
+
+ /* Length of DMA mapping, less than skb->len if alignment
+ * buffer is used.
+ */
+ unsigned int dma_len;
+
+ /* Number of packets in chain starting here */
+ unsigned int chain_len;
+
+ /* Packet chain ready to be submitted to hardware */
+ bool ready;
+};
+
+struct nb8800_priv {
+ struct napi_struct napi;
+
+ void __iomem *base;
+
+ /* RX DMA descriptors */
+ struct nb8800_rx_desc *rx_descs;
+
+ /* RX buffers referenced by DMA descriptors */
+ struct nb8800_rx_buf *rx_bufs;
+
+ /* Current end of chain */
+ u32 rx_eoc;
+
+ /* Value for rx interrupt time register in NAPI interrupt mode */
+ u32 rx_itr_irq;
+
+ /* Value for rx interrupt time register in NAPI poll mode */
+ u32 rx_itr_poll;
+
+ /* Value for config field of rx DMA descriptors */
+ u32 rx_dma_config;
+
+ /* TX DMA descriptors */
+ struct nb8800_tx_desc *tx_descs;
+
+ /* TX packet queue */
+ struct nb8800_tx_buf *tx_bufs;
+
+ /* Number of free tx queue entries */
+ atomic_t tx_free;
+
+ /* First free tx queue entry */
+ u32 tx_next;
+
+ /* Next buffer to transmit */
+ u32 tx_queue;
+
+ /* Start of current packet chain */
+ struct nb8800_tx_buf *tx_chain;
+
+ /* Next buffer to reclaim */
+ u32 tx_done;
+
+ /* Lock for DMA activation */
+ spinlock_t tx_lock;
+
+ struct mii_bus *mii_bus;
+ struct device_node *phy_node;
+ struct phy_device *phydev;
+
+ /* PHY connection type from DT */
+ int phy_mode;
+
+ /* Current link status */
+ int speed;
+ int duplex;
+ int link;
+
+ /* Pause settings */
+ bool pause_aneg;
+ bool pause_rx;
+ bool pause_tx;
+
+ /* DMA base address of rx descriptors, see rx_descs above */
+ dma_addr_t rx_desc_dma;
+
+ /* DMA base address of tx descriptors, see tx_descs above */
+ dma_addr_t tx_desc_dma;
+
+ struct clk *clk;
+};
+
+struct nb8800_ops {
+ int (*init)(struct net_device *dev);
+ int (*reset)(struct net_device *dev);
+};
+
+#endif /* _NB8800_H_ */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index f8d7a2f06950..c82ab87fcbe8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3430,25 +3430,29 @@ static u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb)
return rc;
}
-#if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3)
+/* VXLAN: 4 = 1 (for linear data BD) + 3 (2 for PBD and last BD) */
+#define BNX2X_NUM_VXLAN_TSO_WIN_SUB_BDS 4
+
+/* Regular: 3 = 1 (for linear data BD) + 2 (for PBD and last BD) */
+#define BNX2X_NUM_TSO_WIN_SUB_BDS 3
+
+#if (MAX_SKB_FRAGS >= MAX_FETCH_BD - BDS_PER_TX_PKT)
/* check if packet requires linearization (packet is too fragmented)
no need to check fragmentation if page size > 8K (there will be no
violation to FW restrictions) */
static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb,
u32 xmit_type)
{
- int to_copy = 0;
- int hlen = 0;
- int first_bd_sz = 0;
+ int first_bd_sz = 0, num_tso_win_sub = BNX2X_NUM_TSO_WIN_SUB_BDS;
+ int to_copy = 0, hlen = 0;
- /* 3 = 1 (for linear data BD) + 2 (for PBD and last BD) */
- if (skb_shinfo(skb)->nr_frags >= (MAX_FETCH_BD - 3)) {
+ if (xmit_type & XMIT_GSO_ENC)
+ num_tso_win_sub = BNX2X_NUM_VXLAN_TSO_WIN_SUB_BDS;
+ if (skb_shinfo(skb)->nr_frags >= (MAX_FETCH_BD - num_tso_win_sub)) {
if (xmit_type & XMIT_GSO) {
unsigned short lso_mss = skb_shinfo(skb)->gso_size;
- /* Check if LSO packet needs to be copied:
- 3 = 1 (for headers BD) + 2 (for PBD and last BD) */
- int wnd_size = MAX_FETCH_BD - 3;
+ int wnd_size = MAX_FETCH_BD - num_tso_win_sub;
/* Number of windows to check */
int num_wnds = skb_shinfo(skb)->nr_frags - wnd_size;
int wnd_idx = 0;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index f1d62d5dbaff..2e611dc5f162 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -10139,8 +10139,8 @@ static void __bnx2x_del_vxlan_port(struct bnx2x *bp, u16 port)
DP(BNX2X_MSG_SP, "Invalid vxlan port\n");
return;
}
- bp->vxlan_dst_port--;
- if (bp->vxlan_dst_port)
+ bp->vxlan_dst_port_count--;
+ if (bp->vxlan_dst_port_count)
return;
if (netif_running(bp->dev)) {
@@ -13207,7 +13207,7 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev,
/* VF with OLD Hypervisor or old PF do not support filtering */
if (IS_PF(bp)) {
- if (CHIP_IS_E1x(bp))
+ if (chip_is_e1x)
bp->accept_any_vlan = true;
else
dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index db15c5ee09c5..07f5f239cb65 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2693,17 +2693,16 @@ static int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp)
req.ver_upd = DRV_VER_UPD;
if (BNXT_PF(bp)) {
- unsigned long vf_req_snif_bmap[4];
+ DECLARE_BITMAP(vf_req_snif_bmap, 256);
u32 *data = (u32 *)vf_req_snif_bmap;
- memset(vf_req_snif_bmap, 0, 32);
+ memset(vf_req_snif_bmap, 0, sizeof(vf_req_snif_bmap));
for (i = 0; i < ARRAY_SIZE(bnxt_vf_req_snif); i++)
__set_bit(bnxt_vf_req_snif[i], vf_req_snif_bmap);
- for (i = 0; i < 8; i++) {
- req.vf_req_fwd[i] = cpu_to_le32(*data);
- data++;
- }
+ for (i = 0; i < 8; i++)
+ req.vf_req_fwd[i] = cpu_to_le32(data[i]);
+
req.enables |=
cpu_to_le32(FUNC_DRV_RGTR_REQ_ENABLES_VF_REQ_FWD);
}
@@ -3625,6 +3624,7 @@ static int bnxt_hwrm_func_qcaps(struct bnxt *bp)
pf->fw_fid = le16_to_cpu(resp->fid);
pf->port_id = le16_to_cpu(resp->port_id);
memcpy(pf->mac_addr, resp->perm_mac_address, ETH_ALEN);
+ memcpy(bp->dev->dev_addr, pf->mac_addr, ETH_ALEN);
pf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx);
pf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings);
pf->max_tx_rings = le16_to_cpu(resp->max_tx_rings);
@@ -3648,8 +3648,11 @@ static int bnxt_hwrm_func_qcaps(struct bnxt *bp)
vf->fw_fid = le16_to_cpu(resp->fid);
memcpy(vf->mac_addr, resp->perm_mac_address, ETH_ALEN);
- if (!is_valid_ether_addr(vf->mac_addr))
- random_ether_addr(vf->mac_addr);
+ if (is_valid_ether_addr(vf->mac_addr))
+ /* overwrite netdev dev_adr with admin VF MAC */
+ memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN);
+ else
+ random_ether_addr(bp->dev->dev_addr);
vf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx);
vf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings);
@@ -3880,6 +3883,8 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp)
#endif
}
+static int bnxt_cfg_rx_mode(struct bnxt *);
+
static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
{
int rc = 0;
@@ -3946,11 +3951,9 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
bp->vnic_info[0].rx_mask |=
CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
- rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
- if (rc) {
- netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n", rc);
+ rc = bnxt_cfg_rx_mode(bp);
+ if (rc)
goto err_out;
- }
rc = bnxt_hwrm_set_coal(bp);
if (rc)
@@ -4599,7 +4602,7 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
bp->nge_port_cnt = 1;
}
- bp->state = BNXT_STATE_OPEN;
+ set_bit(BNXT_STATE_OPEN, &bp->state);
bnxt_enable_int(bp);
/* Enable TX queues */
bnxt_tx_enable(bp);
@@ -4675,8 +4678,10 @@ int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
/* Change device state to avoid TX queue wake up's */
bnxt_tx_disable(bp);
- bp->state = BNXT_STATE_CLOSED;
- cancel_work_sync(&bp->sp_task);
+ clear_bit(BNXT_STATE_OPEN, &bp->state);
+ smp_mb__after_atomic();
+ while (test_bit(BNXT_STATE_IN_SP_TASK, &bp->state))
+ msleep(20);
/* Flush rings before disabling interrupts */
bnxt_shutdown_nic(bp, irq_re_init);
@@ -4865,7 +4870,7 @@ static void bnxt_set_rx_mode(struct net_device *dev)
}
}
-static void bnxt_cfg_rx_mode(struct bnxt *bp)
+static int bnxt_cfg_rx_mode(struct bnxt *bp)
{
struct net_device *dev = bp->dev;
struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
@@ -4914,6 +4919,7 @@ static void bnxt_cfg_rx_mode(struct bnxt *bp)
netdev_err(bp->dev, "HWRM vnic filter failure rc: %x\n",
rc);
vnic->uc_filter_count = i;
+ return rc;
}
}
@@ -4922,6 +4928,8 @@ skip_uc:
if (rc)
netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n",
rc);
+
+ return rc;
}
static netdev_features_t bnxt_fix_features(struct net_device *dev,
@@ -5023,8 +5031,10 @@ static void bnxt_dbg_dump_states(struct bnxt *bp)
static void bnxt_reset_task(struct bnxt *bp)
{
bnxt_dbg_dump_states(bp);
- if (netif_running(bp->dev))
- bnxt_tx_disable(bp); /* prevent tx timout again */
+ if (netif_running(bp->dev)) {
+ bnxt_close_nic(bp, false, false);
+ bnxt_open_nic(bp, false, false);
+ }
}
static void bnxt_tx_timeout(struct net_device *dev)
@@ -5074,8 +5084,12 @@ static void bnxt_sp_task(struct work_struct *work)
struct bnxt *bp = container_of(work, struct bnxt, sp_task);
int rc;
- if (bp->state != BNXT_STATE_OPEN)
+ set_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
+ smp_mb__after_atomic();
+ if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
+ clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
return;
+ }
if (test_and_clear_bit(BNXT_RX_MASK_SP_EVENT, &bp->sp_event))
bnxt_cfg_rx_mode(bp);
@@ -5099,8 +5113,19 @@ static void bnxt_sp_task(struct work_struct *work)
bnxt_hwrm_tunnel_dst_port_free(
bp, TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN);
}
- if (test_and_clear_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event))
+ if (test_and_clear_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event)) {
+ /* bnxt_reset_task() calls bnxt_close_nic() which waits
+ * for BNXT_STATE_IN_SP_TASK to clear.
+ */
+ clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
+ rtnl_lock();
bnxt_reset_task(bp);
+ set_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
+ rtnl_unlock();
+ }
+
+ smp_mb__before_atomic();
+ clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
}
static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
@@ -5179,7 +5204,7 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
bp->timer.function = bnxt_timer;
bp->current_interval = BNXT_TIMER_INTERVAL;
- bp->state = BNXT_STATE_CLOSED;
+ clear_bit(BNXT_STATE_OPEN, &bp->state);
return 0;
@@ -5212,13 +5237,27 @@ init_err:
static int bnxt_change_mac_addr(struct net_device *dev, void *p)
{
struct sockaddr *addr = p;
+ struct bnxt *bp = netdev_priv(dev);
+ int rc = 0;
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
+#ifdef CONFIG_BNXT_SRIOV
+ if (BNXT_VF(bp) && is_valid_ether_addr(bp->vf.mac_addr))
+ return -EADDRNOTAVAIL;
+#endif
+
+ if (ether_addr_equal(addr->sa_data, dev->dev_addr))
+ return 0;
+
memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+ if (netif_running(dev)) {
+ bnxt_close_nic(bp, false, false);
+ rc = bnxt_open_nic(bp, false, false);
+ }
- return 0;
+ return rc;
}
/* rtnl_lock held */
@@ -5686,15 +5725,12 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
bnxt_set_tpa_flags(bp);
bnxt_set_ring_params(bp);
dflt_rings = netif_get_num_default_rss_queues();
- if (BNXT_PF(bp)) {
- memcpy(dev->dev_addr, bp->pf.mac_addr, ETH_ALEN);
+ if (BNXT_PF(bp))
bp->pf.max_irqs = max_irqs;
- } else {
#if defined(CONFIG_BNXT_SRIOV)
- memcpy(dev->dev_addr, bp->vf.mac_addr, ETH_ALEN);
+ else
bp->vf.max_irqs = max_irqs;
#endif
- }
bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings);
bp->rx_nr_rings = min_t(int, dflt_rings, max_rx_rings);
bp->tx_nr_rings_per_tc = min_t(int, dflt_rings, max_tx_rings);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 674bc5159b91..f199f4cc8ffe 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -925,9 +925,9 @@ struct bnxt {
struct timer_list timer;
- int state;
-#define BNXT_STATE_CLOSED 0
-#define BNXT_STATE_OPEN 1
+ unsigned long state;
+#define BNXT_STATE_OPEN 0
+#define BNXT_STATE_IN_SP_TASK 1
struct bnxt_irq *irq_tbl;
u8 mac_addr[ETH_ALEN];
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index f4cf68861069..ea044bbcd384 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -21,7 +21,7 @@
#ifdef CONFIG_BNXT_SRIOV
static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id)
{
- if (bp->state != BNXT_STATE_OPEN) {
+ if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
netdev_err(bp->dev, "vf ndo called though PF is down\n");
return -EINVAL;
}
@@ -804,10 +804,9 @@ void bnxt_update_vf_mac(struct bnxt *bp)
if (!is_valid_ether_addr(resp->perm_mac_address))
goto update_vf_mac_exit;
- if (ether_addr_equal(resp->perm_mac_address, bp->vf.mac_addr))
- goto update_vf_mac_exit;
-
- memcpy(bp->vf.mac_addr, resp->perm_mac_address, ETH_ALEN);
+ if (!ether_addr_equal(resp->perm_mac_address, bp->vf.mac_addr))
+ memcpy(bp->vf.mac_addr, resp->perm_mac_address, ETH_ALEN);
+ /* overwrite netdev dev_adr with admin VF MAC */
memcpy(bp->dev->dev_addr, bp->vf.mac_addr, ETH_ALEN);
update_vf_mac_exit:
mutex_unlock(&bp->hwrm_cmd_lock);
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 88c1e1a834f8..169059c92f80 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -1682,6 +1682,8 @@ static void macb_init_hw(struct macb *bp)
macb_set_hwaddr(bp);
config = macb_mdc_clk_div(bp);
+ if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII)
+ config |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL);
config |= MACB_BF(RBOF, NET_IP_ALIGN); /* Make eth data aligned */
config |= MACB_BIT(PAE); /* PAuse Enable */
config |= MACB_BIT(DRFCS); /* Discard Rx FCS */
@@ -2416,6 +2418,8 @@ static int macb_init(struct platform_device *pdev)
/* Set MII management clock divider */
val = macb_mdc_clk_div(bp);
val |= macb_dbw(bp);
+ if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII)
+ val |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL);
macb_writel(bp, NCFGR, val);
return 0;
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 6e1faea00ca8..d83b0db77821 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -215,12 +215,17 @@
/* GEM specific NCFGR bitfields. */
#define GEM_GBE_OFFSET 10 /* Gigabit mode enable */
#define GEM_GBE_SIZE 1
+#define GEM_PCSSEL_OFFSET 11
+#define GEM_PCSSEL_SIZE 1
#define GEM_CLK_OFFSET 18 /* MDC clock division */
#define GEM_CLK_SIZE 3
#define GEM_DBW_OFFSET 21 /* Data bus width */
#define GEM_DBW_SIZE 2
#define GEM_RXCOEN_OFFSET 24
#define GEM_RXCOEN_SIZE 1
+#define GEM_SGMIIEN_OFFSET 27
+#define GEM_SGMIIEN_SIZE 1
+
/* Constants for data bus width. */
#define GEM_DBW32 0 /* 32 bit AMBA AHB data bus width */
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index f683d97d7614..b89504405b72 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -560,7 +560,7 @@ static int liquidio_resume(struct pci_dev *pdev)
#endif
/* For PCI-E Advanced Error Recovery (AER) Interface */
-static struct pci_error_handlers liquidio_err_handler = {
+static const struct pci_error_handlers liquidio_err_handler = {
.error_detected = liquidio_pcie_error_detected,
.mmio_enabled = liquidio_pcie_mmio_enabled,
.slot_reset = liquidio_pcie_slot_reset,
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index d3950b20feb9..39ca6744a4e6 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -120,10 +120,9 @@
* Calculated for SCLK of 700Mhz
* value written should be a 1/16th of what is expected
*
- * 1 tick per 0.05usec = value of 2.2
- * This 10% would be covered in CQ timer thresh value
+ * 1 tick per 0.025usec
*/
-#define NICPF_CLK_PER_INT_TICK 2
+#define NICPF_CLK_PER_INT_TICK 1
/* Time to wait before we decide that a SQ is stuck.
*
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index c561fdcb79a7..5f24d11cb16a 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -615,6 +615,21 @@ static int nic_config_loopback(struct nicpf *nic, struct set_loopback *lbk)
return 0;
}
+static void nic_enable_vf(struct nicpf *nic, int vf, bool enable)
+{
+ int bgx, lmac;
+
+ nic->vf_enabled[vf] = enable;
+
+ if (vf >= nic->num_vf_en)
+ return;
+
+ bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+ lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+ bgx_lmac_rx_tx_enable(nic->node, bgx, lmac, enable);
+}
+
/* Interrupt handler to handle mailbox messages from VFs */
static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
{
@@ -714,14 +729,14 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
break;
case NIC_MBOX_MSG_CFG_DONE:
/* Last message of VF config msg sequence */
- nic->vf_enabled[vf] = true;
+ nic_enable_vf(nic, vf, true);
goto unlock;
case NIC_MBOX_MSG_SHUTDOWN:
/* First msg in VF teardown sequence */
- nic->vf_enabled[vf] = false;
if (vf >= nic->num_vf_en)
nic->sqs_used[vf - nic->num_vf_en] = false;
nic->pqs_vf[vf] = 0;
+ nic_enable_vf(nic, vf, false);
break;
case NIC_MBOX_MSG_ALLOC_SQS:
nic_alloc_sqs(nic, &mbx.sqs_alloc);
@@ -1074,8 +1089,7 @@ static void nic_remove(struct pci_dev *pdev)
if (nic->check_link) {
/* Destroy work Queue */
- cancel_delayed_work(&nic->dwork);
- flush_workqueue(nic->check_link);
+ cancel_delayed_work_sync(&nic->dwork);
destroy_workqueue(nic->check_link);
}
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index af54c10945c2..a12b2e38cf61 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -112,6 +112,13 @@ static int nicvf_get_settings(struct net_device *netdev,
cmd->supported = 0;
cmd->transceiver = XCVR_EXTERNAL;
+
+ if (!nic->link_up) {
+ cmd->duplex = DUPLEX_UNKNOWN;
+ ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
+ return 0;
+ }
+
if (nic->speed <= 1000) {
cmd->port = PORT_MII;
cmd->autoneg = AUTONEG_ENABLE;
@@ -125,6 +132,13 @@ static int nicvf_get_settings(struct net_device *netdev,
return 0;
}
+static u32 nicvf_get_link(struct net_device *netdev)
+{
+ struct nicvf *nic = netdev_priv(netdev);
+
+ return nic->link_up;
+}
+
static void nicvf_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *info)
{
@@ -660,7 +674,7 @@ static int nicvf_set_channels(struct net_device *dev,
static const struct ethtool_ops nicvf_ethtool_ops = {
.get_settings = nicvf_get_settings,
- .get_link = ethtool_op_get_link,
+ .get_link = nicvf_get_link,
.get_drvinfo = nicvf_get_drvinfo,
.get_msglevel = nicvf_get_msglevel,
.set_msglevel = nicvf_set_msglevel,
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index a9377727c11c..dde8dc720cd3 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1057,6 +1057,7 @@ int nicvf_stop(struct net_device *netdev)
netif_carrier_off(netdev);
netif_tx_stop_all_queues(nic->netdev);
+ nic->link_up = false;
/* Teardown secondary qsets first */
if (!nic->sqs_mode) {
@@ -1211,9 +1212,6 @@ int nicvf_open(struct net_device *netdev)
nic->drv_stats.txq_stop = 0;
nic->drv_stats.txq_wake = 0;
- netif_carrier_on(netdev);
- netif_tx_start_all_queues(netdev);
-
return 0;
cleanup:
nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
@@ -1583,8 +1581,14 @@ err_disable_device:
static void nicvf_remove(struct pci_dev *pdev)
{
struct net_device *netdev = pci_get_drvdata(pdev);
- struct nicvf *nic = netdev_priv(netdev);
- struct net_device *pnetdev = nic->pnicvf->netdev;
+ struct nicvf *nic;
+ struct net_device *pnetdev;
+
+ if (!netdev)
+ return;
+
+ nic = netdev_priv(netdev);
+ pnetdev = nic->pnicvf->netdev;
/* Check if this Qset is assigned to different VF.
* If yes, clean primary and all secondary Qsets.
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index e404ea837727..206b6a71a545 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -592,7 +592,7 @@ void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
/* Set threshold value for interrupt generation */
nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh);
nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2,
- qidx, nic->cq_coalesce_usecs);
+ qidx, CMP_QUEUE_TIMER_THRESH);
}
/* Configures transmit queue */
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index fb4957d09914..033e8306e91c 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -76,7 +76,7 @@
#define CMP_QSIZE CMP_QUEUE_SIZE2
#define CMP_QUEUE_LEN (1ULL << (CMP_QSIZE + 10))
#define CMP_QUEUE_CQE_THRESH 0
-#define CMP_QUEUE_TIMER_THRESH 220 /* 10usec */
+#define CMP_QUEUE_TIMER_THRESH 80 /* ~2usec */
#define RBDR_SIZE RBDR_SIZE0
#define RCV_BUF_COUNT (1ULL << (RBDR_SIZE + 13))
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 180aa9fabf48..9df26c2263bc 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -186,6 +186,23 @@ void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac)
}
EXPORT_SYMBOL(bgx_set_lmac_mac);
+void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
+{
+ struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+ u64 cfg;
+
+ if (!bgx)
+ return;
+
+ cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+ if (enable)
+ cfg |= CMR_PKT_RX_EN | CMR_PKT_TX_EN;
+ else
+ cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN);
+ bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+}
+EXPORT_SYMBOL(bgx_lmac_rx_tx_enable);
+
static void bgx_sgmii_change_link_state(struct lmac *lmac)
{
struct bgx *bgx = lmac->bgx;
@@ -612,6 +629,8 @@ static void bgx_poll_for_link(struct work_struct *work)
lmac->last_duplex = 1;
} else {
lmac->link_up = 0;
+ lmac->last_speed = SPEED_UNKNOWN;
+ lmac->last_duplex = DUPLEX_UNKNOWN;
}
if (lmac->last_link != lmac->link_up) {
@@ -654,8 +673,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
}
/* Enable lmac */
- bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG,
- CMR_EN | CMR_PKT_RX_EN | CMR_PKT_TX_EN);
+ bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN);
/* Restore default cfg, incase low level firmware changed it */
bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, 0x03);
@@ -695,8 +713,7 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
lmac = &bgx->lmac[lmacid];
if (lmac->check_link) {
/* Destroy work queue */
- cancel_delayed_work(&lmac->dwork);
- flush_workqueue(lmac->check_link);
+ cancel_delayed_work_sync(&lmac->dwork);
destroy_workqueue(lmac->check_link);
}
@@ -1009,6 +1026,9 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
struct bgx *bgx = NULL;
u8 lmac;
+ /* Load octeon mdio driver */
+ octeon_mdiobus_force_mod_depencency();
+
bgx = devm_kzalloc(dev, sizeof(*bgx), GFP_KERNEL);
if (!bgx)
return -ENOMEM;
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 07b7ec66c60d..149e179363a1 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -182,6 +182,8 @@ enum MCAST_MODE {
#define BCAST_ACCEPT 1
#define CAM_ACCEPT 1
+void octeon_mdiobus_force_mod_depencency(void);
+void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable);
void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac);
unsigned bgx_get_map(int node);
int bgx_get_lmac_count(int node, int bgx);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
index c308429dd9c7..11dd91e4db56 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
@@ -295,6 +295,10 @@ struct clip_tbl *t4_init_clip_tbl(unsigned int clipt_start,
INIT_LIST_HEAD(&ctbl->hash_list[i]);
cl_list = t4_alloc_mem(clipt_size*sizeof(struct clip_entry));
+ if (!cl_list) {
+ t4_free_mem(ctbl);
+ return NULL;
+ }
ctbl->cl_list = (void *)cl_list;
for (i = 0; i < clipt_size; i++) {
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index ed41559bae77..b553409e04ad 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -98,8 +98,7 @@ static int csr0 = 0x01A00000 | 0x4800;
#elif defined(__mips__)
static int csr0 = 0x00200000 | 0x4000;
#else
-#warning Processor architecture undefined!
-static int csr0 = 0x00A00000 | 0x4800;
+static int csr0;
#endif
/* Operational parameters that usually are not changed. */
@@ -1982,6 +1981,12 @@ static int __init tulip_init (void)
pr_info("%s", version);
#endif
+ if (!csr0) {
+ pr_warn("tulip: unknown CPU architecture, using default csr0\n");
+ /* default to 8 longword cache line alignment */
+ csr0 = 0x00A00000 | 0x4800;
+ }
+
/* copy module parms into globals */
tulip_rx_copybreak = rx_copybreak;
tulip_max_interrupt_work = max_interrupt_work;
diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c
index 9beb3d34d4ba..3c0e4d5c5fef 100644
--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
+++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
@@ -907,7 +907,7 @@ static void init_registers(struct net_device *dev)
#elif defined(CONFIG_SPARC) || defined (CONFIG_PARISC) || defined(CONFIG_ARM)
i |= 0x4800;
#else
-#warning Processor architecture undefined
+ dev_warn(&dev->dev, "unknown CPU architecture, using default csr0 setting\n");
i |= 0x4800;
#endif
iowrite32(i, ioaddr + PCIBusCfg);
diff --git a/drivers/net/ethernet/dlink/Kconfig b/drivers/net/ethernet/dlink/Kconfig
index f6e858d0b9d4..ebdc83247bb6 100644
--- a/drivers/net/ethernet/dlink/Kconfig
+++ b/drivers/net/ethernet/dlink/Kconfig
@@ -17,15 +17,16 @@ config NET_VENDOR_DLINK
if NET_VENDOR_DLINK
config DL2K
- tristate "DL2000/TC902x-based Gigabit Ethernet support"
+ tristate "DL2000/TC902x/IP1000A-based Gigabit Ethernet support"
depends on PCI
select CRC32
---help---
- This driver supports DL2000/TC902x-based Gigabit ethernet cards,
+ This driver supports DL2000/TC902x/IP1000A-based Gigabit ethernet cards,
which includes
D-Link DGE-550T Gigabit Ethernet Adapter.
D-Link DL2000-based Gigabit Ethernet Adapter.
Sundance/Tamarack TC902x Gigabit Ethernet Adapter.
+ ICPlus IP1000A-based cards
To compile this driver as a module, choose M here: the
module will be called dl2k.
diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index cf0a5fcdaaaf..ccca4799c27b 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c
@@ -253,6 +253,19 @@ rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto err_out_unmap_rx;
+ if (np->chip_id == CHIP_IP1000A &&
+ (np->pdev->revision == 0x40 || np->pdev->revision == 0x41)) {
+ /* PHY magic taken from ipg driver, undocumented registers */
+ mii_write(dev, np->phy_addr, 31, 0x0001);
+ mii_write(dev, np->phy_addr, 27, 0x01e0);
+ mii_write(dev, np->phy_addr, 31, 0x0002);
+ mii_write(dev, np->phy_addr, 27, 0xeb8e);
+ mii_write(dev, np->phy_addr, 31, 0x0000);
+ mii_write(dev, np->phy_addr, 30, 0x005e);
+ /* advertise 1000BASE-T half & full duplex, prefer MASTER */
+ mii_write(dev, np->phy_addr, MII_CTRL1000, 0x0700);
+ }
+
/* Fiber device? */
np->phy_media = (dr16(ASICCtrl) & PhyMedia) ? 1 : 0;
np->link_status = 0;
@@ -361,6 +374,11 @@ parse_eeprom (struct net_device *dev)
for (i = 0; i < 6; i++)
dev->dev_addr[i] = psrom->mac_addr[i];
+ if (np->chip_id == CHIP_IP1000A) {
+ np->led_mode = psrom->led_mode;
+ return 0;
+ }
+
if (np->pdev->vendor != PCI_VENDOR_ID_DLINK) {
return 0;
}
@@ -406,6 +424,28 @@ parse_eeprom (struct net_device *dev)
return 0;
}
+static void rio_set_led_mode(struct net_device *dev)
+{
+ struct netdev_private *np = netdev_priv(dev);
+ void __iomem *ioaddr = np->ioaddr;
+ u32 mode;
+
+ if (np->chip_id != CHIP_IP1000A)
+ return;
+
+ mode = dr32(ASICCtrl);
+ mode &= ~(IPG_AC_LED_MODE_BIT_1 | IPG_AC_LED_MODE | IPG_AC_LED_SPEED);
+
+ if (np->led_mode & 0x01)
+ mode |= IPG_AC_LED_MODE;
+ if (np->led_mode & 0x02)
+ mode |= IPG_AC_LED_MODE_BIT_1;
+ if (np->led_mode & 0x08)
+ mode |= IPG_AC_LED_SPEED;
+
+ dw32(ASICCtrl, mode);
+}
+
static int
rio_open (struct net_device *dev)
{
@@ -424,6 +464,8 @@ rio_open (struct net_device *dev)
GlobalReset | DMAReset | FIFOReset | NetworkReset | HostReset);
mdelay(10);
+ rio_set_led_mode(dev);
+
/* DebugCtrl bit 4, 5, 9 must set */
dw32(DebugCtrl, dr32(DebugCtrl) | 0x0230);
@@ -433,9 +475,13 @@ rio_open (struct net_device *dev)
alloc_list (dev);
- /* Get station address */
- for (i = 0; i < 6; i++)
- dw8(StationAddr0 + i, dev->dev_addr[i]);
+ /* Set station address */
+ /* 16 or 32-bit access is required by TC9020 datasheet but 8-bit works
+ * too. However, it doesn't work on IP1000A so we use 16-bit access.
+ */
+ for (i = 0; i < 3; i++)
+ dw16(StationAddr0 + 2 * i,
+ cpu_to_le16(((u16 *)dev->dev_addr)[i]));
set_multicast (dev);
if (np->coalesce) {
@@ -780,6 +826,7 @@ tx_error (struct net_device *dev, int tx_status)
break;
mdelay (1);
}
+ rio_set_led_mode(dev);
rio_free_tx (dev, 1);
/* Reset TFDListPtr */
dw32(TFDListPtr0, np->tx_ring_dma +
@@ -799,6 +846,7 @@ tx_error (struct net_device *dev, int tx_status)
break;
mdelay (1);
}
+ rio_set_led_mode(dev);
/* Let TxStartThresh stay default value */
}
/* Maximum Collisions */
@@ -965,6 +1013,7 @@ rio_error (struct net_device *dev, int int_status)
dev->name, int_status);
dw16(ASICCtrl + 2, GlobalReset | HostReset);
mdelay (500);
+ rio_set_led_mode(dev);
}
}
diff --git a/drivers/net/ethernet/dlink/dl2k.h b/drivers/net/ethernet/dlink/dl2k.h
index 23c07b007069..8f4f61262d5c 100644
--- a/drivers/net/ethernet/dlink/dl2k.h
+++ b/drivers/net/ethernet/dlink/dl2k.h
@@ -211,6 +211,10 @@ enum ASICCtrl_HiWord_bits {
ResetBusy = 0x0400,
};
+#define IPG_AC_LED_MODE BIT(14)
+#define IPG_AC_LED_SPEED BIT(27)
+#define IPG_AC_LED_MODE_BIT_1 BIT(29)
+
/* Transmit Frame Control bits */
enum TFC_bits {
DwordAlign = 0x00000000,
@@ -332,7 +336,10 @@ typedef struct t_SROM {
u16 asic_ctrl; /* 0x02 */
u16 sub_vendor_id; /* 0x04 */
u16 sub_system_id; /* 0x06 */
- u16 reserved1[12]; /* 0x08-0x1f */
+ u16 pci_base_1; /* 0x08 (IP1000A only) */
+ u16 pci_base_2; /* 0x0a (IP1000A only) */
+ u16 led_mode; /* 0x0c (IP1000A only) */
+ u16 reserved1[9]; /* 0x0e-0x1f */
u8 mac_addr[6]; /* 0x20-0x25 */
u8 reserved2[10]; /* 0x26-0x2f */
u8 sib[204]; /* 0x30-0xfb */
@@ -397,6 +404,7 @@ struct netdev_private {
u16 advertising; /* NWay media advertisement */
u16 negotiate; /* Negotiated media */
int phy_addr; /* PHY addresses. */
+ u16 led_mode; /* LED mode read from EEPROM (IP1000A only) */
};
/* The station address location in the EEPROM. */
@@ -407,10 +415,15 @@ struct netdev_private {
class_mask of the class are honored during the comparison.
driver_data Data private to the driver.
*/
+#define CHIP_IP1000A 1
static const struct pci_device_id rio_pci_tbl[] = {
{0x1186, 0x4000, PCI_ANY_ID, PCI_ANY_ID, },
{0x13f0, 0x1021, PCI_ANY_ID, PCI_ANY_ID, },
+ { PCI_VDEVICE(SUNDANCE, 0x1023), CHIP_IP1000A },
+ { PCI_VDEVICE(SUNDANCE, 0x2021), CHIP_IP1000A },
+ { PCI_VDEVICE(DLINK, 0x9021), CHIP_IP1000A },
+ { PCI_VDEVICE(DLINK, 0x4020), CHIP_IP1000A },
{ }
};
MODULE_DEVICE_TABLE (pci, rio_pci_tbl);
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index d463563e1f70..6ee78c203eca 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -848,8 +848,6 @@ void be_roce_dev_remove(struct be_adapter *);
/*
* internal function to open-close roce device during ifup-ifdown.
*/
-void be_roce_dev_open(struct be_adapter *);
-void be_roce_dev_close(struct be_adapter *);
void be_roce_dev_shutdown(struct be_adapter *);
#endif /* BE_H */
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index f4cb8e425853..734f655c99c1 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -1062,9 +1062,7 @@ static int be_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
static int be_set_rss_hash_opts(struct be_adapter *adapter,
struct ethtool_rxnfc *cmd)
{
- struct be_rx_obj *rxo;
- int status = 0, i, j;
- u8 rsstable[128];
+ int status;
u32 rss_flags = adapter->rss_info.rss_flags;
if (cmd->data != L3_RSS_FLAGS &&
@@ -1113,20 +1111,11 @@ static int be_set_rss_hash_opts(struct be_adapter *adapter,
}
if (rss_flags == adapter->rss_info.rss_flags)
- return status;
-
- if (be_multi_rxq(adapter)) {
- for (j = 0; j < 128; j += adapter->num_rss_qs) {
- for_all_rss_queues(adapter, rxo, i) {
- if ((j + i) >= 128)
- break;
- rsstable[j + i] = rxo->rss_id;
- }
- }
- }
+ return 0;
status = be_cmd_rss_config(adapter, adapter->rss_info.rsstable,
- rss_flags, 128, adapter->rss_info.rss_hkey);
+ rss_flags, RSS_INDIR_TABLE_LEN,
+ adapter->rss_info.rss_hkey);
if (!status)
adapter->rss_info.rss_flags = rss_flags;
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index eb48a977f8da..8a1d9fffd7d6 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -3299,8 +3299,10 @@ static int be_msix_register(struct be_adapter *adapter)
return 0;
err_msix:
- for (i--, eqo = &adapter->eq_obj[i]; i >= 0; i--, eqo--)
+ for (i--; i >= 0; i--) {
+ eqo = &adapter->eq_obj[i];
free_irq(be_msix_vec_get(adapter, eqo), eqo);
+ }
dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
status);
be_msix_disable(adapter);
@@ -3432,8 +3434,6 @@ static int be_close(struct net_device *netdev)
be_disable_if_filters(adapter);
- be_roce_dev_close(adapter);
-
if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
for_all_evt_queues(adapter, eqo, i) {
napi_disable(&eqo->napi);
@@ -3518,7 +3518,7 @@ static int be_rx_qs_create(struct be_adapter *adapter)
netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
- 128, rss_key);
+ RSS_INDIR_TABLE_LEN, rss_key);
if (rc) {
rss->rss_flags = RSS_ENABLE_NONE;
return rc;
@@ -3601,8 +3601,6 @@ static int be_open(struct net_device *netdev)
be_link_status_update(adapter, link_status);
netif_tx_start_all_queues(netdev);
- be_roce_dev_open(adapter);
-
#ifdef CONFIG_BE2NET_VXLAN
if (skyhawk_chip(adapter))
vxlan_get_rx_port(netdev);
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c
index 60368207bf58..4089156a7f5e 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.c
+++ b/drivers/net/ethernet/emulex/benet/be_roce.c
@@ -116,40 +116,6 @@ void be_roce_dev_remove(struct be_adapter *adapter)
}
}
-static void _be_roce_dev_open(struct be_adapter *adapter)
-{
- if (ocrdma_drv && adapter->ocrdma_dev &&
- ocrdma_drv->state_change_handler)
- ocrdma_drv->state_change_handler(adapter->ocrdma_dev,
- BE_DEV_UP);
-}
-
-void be_roce_dev_open(struct be_adapter *adapter)
-{
- if (be_roce_supported(adapter)) {
- mutex_lock(&be_adapter_list_lock);
- _be_roce_dev_open(adapter);
- mutex_unlock(&be_adapter_list_lock);
- }
-}
-
-static void _be_roce_dev_close(struct be_adapter *adapter)
-{
- if (ocrdma_drv && adapter->ocrdma_dev &&
- ocrdma_drv->state_change_handler)
- ocrdma_drv->state_change_handler(adapter->ocrdma_dev,
- BE_DEV_DOWN);
-}
-
-void be_roce_dev_close(struct be_adapter *adapter)
-{
- if (be_roce_supported(adapter)) {
- mutex_lock(&be_adapter_list_lock);
- _be_roce_dev_close(adapter);
- mutex_unlock(&be_adapter_list_lock);
- }
-}
-
void be_roce_dev_shutdown(struct be_adapter *adapter)
{
if (be_roce_supported(adapter)) {
@@ -177,8 +143,6 @@ int be_roce_register_driver(struct ocrdma_driver *drv)
_be_roce_dev_add(dev);
netdev = dev->netdev;
- if (netif_running(netdev) && netif_oper_up(netdev))
- _be_roce_dev_open(dev);
}
mutex_unlock(&be_adapter_list_lock);
return 0;
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.h b/drivers/net/ethernet/emulex/benet/be_roce.h
index cde6ef905ec4..fde609789483 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.h
+++ b/drivers/net/ethernet/emulex/benet/be_roce.h
@@ -60,9 +60,7 @@ struct ocrdma_driver {
void (*state_change_handler) (struct ocrdma_dev *, u32 new_state);
};
-enum {
- BE_DEV_UP = 0,
- BE_DEV_DOWN = 1,
+enum be_roce_event {
BE_DEV_SHUTDOWN = 2
};
diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
index 63c2bcf8031a..b1026689b78f 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.c
+++ b/drivers/net/ethernet/ezchip/nps_enet.c
@@ -48,21 +48,15 @@ static void nps_enet_read_rx_fifo(struct net_device *ndev,
*reg = nps_enet_reg_get(priv, NPS_ENET_REG_RX_BUF);
else { /* !dst_is_aligned */
for (i = 0; i < len; i++, reg++) {
- u32 buf =
- nps_enet_reg_get(priv, NPS_ENET_REG_RX_BUF);
-
- /* to accommodate word-unaligned address of "reg"
- * we have to do memcpy_toio() instead of simple "=".
- */
- memcpy_toio((void __iomem *)reg, &buf, sizeof(buf));
+ u32 buf = nps_enet_reg_get(priv, NPS_ENET_REG_RX_BUF);
+ put_unaligned(buf, reg);
}
}
/* copy last bytes (if any) */
if (last) {
u32 buf = nps_enet_reg_get(priv, NPS_ENET_REG_RX_BUF);
-
- memcpy_toio((void __iomem *)reg, &buf, last);
+ memcpy((u8*)reg, &buf, last);
}
}
@@ -367,7 +361,7 @@ static void nps_enet_send_frame(struct net_device *ndev,
struct nps_enet_tx_ctl tx_ctrl;
short length = skb->len;
u32 i, len = DIV_ROUND_UP(length, sizeof(u32));
- u32 *src = (u32 *)virt_to_phys(skb->data);
+ u32 *src = (void *)skb->data;
bool src_is_aligned = IS_ALIGNED((unsigned long)src, sizeof(u32));
tx_ctrl.value = 0;
@@ -375,17 +369,11 @@ static void nps_enet_send_frame(struct net_device *ndev,
if (src_is_aligned)
for (i = 0; i < len; i++, src++)
nps_enet_reg_set(priv, NPS_ENET_REG_TX_BUF, *src);
- else { /* !src_is_aligned */
- for (i = 0; i < len; i++, src++) {
- u32 buf;
-
- /* to accommodate word-unaligned address of "src"
- * we have to do memcpy_fromio() instead of simple "="
- */
- memcpy_fromio(&buf, (void __iomem *)src, sizeof(buf));
- nps_enet_reg_set(priv, NPS_ENET_REG_TX_BUF, buf);
- }
- }
+ else /* !src_is_aligned */
+ for (i = 0; i < len; i++, src++)
+ nps_enet_reg_set(priv, NPS_ENET_REG_TX_BUF,
+ get_unaligned(src));
+
/* Write the length of the Frame */
tx_ctrl.nt = length;
diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index ff76d4e9dc1b..bee32a9d9876 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -7,7 +7,8 @@ config NET_VENDOR_FREESCALE
default y
depends on FSL_SOC || QUICC_ENGINE || CPM1 || CPM2 || PPC_MPC512x || \
M523x || M527x || M5272 || M528x || M520x || M532x || \
- ARCH_MXC || ARCH_MXS || (PPC_MPC52xx && PPC_BESTCOMM)
+ ARCH_MXC || ARCH_MXS || (PPC_MPC52xx && PPC_BESTCOMM) || \
+ ARCH_LAYERSCAPE
---help---
If you have a network (Ethernet) card belonging to this class, say Y.
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
index 08f5b911d96b..52e0091b4fb2 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
@@ -552,7 +552,7 @@ static void tx_restart(struct net_device *dev)
cbd_t __iomem *prev_bd;
cbd_t __iomem *last_tx_bd;
- last_tx_bd = fep->tx_bd_base + (fpi->tx_ring * sizeof(cbd_t));
+ last_tx_bd = fep->tx_bd_base + ((fpi->tx_ring - 1) * sizeof(cbd_t));
/* get the current bd held in TBPTR and scan back from this point */
recheck_bd = curr_tbptr = (cbd_t __iomem *)
diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
index 55c36230e176..40071dad1c57 100644
--- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c
+++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
@@ -464,7 +464,7 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev)
* address). Print error message but continue anyway.
*/
if ((void *)tbipa > priv->map + resource_size(&res) - 4)
- dev_err(&pdev->dev, "invalid register map (should be at least 0x%04x to contain TBI address)\n",
+ dev_err(&pdev->dev, "invalid register map (should be at least 0x%04zx to contain TBI address)\n",
((void *)tbipa - priv->map) + 4);
iowrite32be(be32_to_cpup(prop), tbipa);
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 3e6b9b437497..3e233d924cce 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -647,9 +647,9 @@ static int gfar_parse_group(struct device_node *np,
if (model && strcasecmp(model, "FEC")) {
gfar_irq(grp, RX)->irq = irq_of_parse_and_map(np, 1);
gfar_irq(grp, ER)->irq = irq_of_parse_and_map(np, 2);
- if (gfar_irq(grp, TX)->irq == NO_IRQ ||
- gfar_irq(grp, RX)->irq == NO_IRQ ||
- gfar_irq(grp, ER)->irq == NO_IRQ)
+ if (!gfar_irq(grp, TX)->irq ||
+ !gfar_irq(grp, RX)->irq ||
+ !gfar_irq(grp, ER)->irq)
return -EINVAL;
}
@@ -894,7 +894,8 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
FSL_GIANFAR_DEV_HAS_VLAN |
FSL_GIANFAR_DEV_HAS_MAGIC_PACKET |
FSL_GIANFAR_DEV_HAS_EXTENDED_HASH |
- FSL_GIANFAR_DEV_HAS_TIMER;
+ FSL_GIANFAR_DEV_HAS_TIMER |
+ FSL_GIANFAR_DEV_HAS_RX_FILER;
err = of_property_read_string(np, "phy-connection-type", &ctype);
@@ -1396,8 +1397,9 @@ static int gfar_probe(struct platform_device *ofdev)
priv->rx_queue[i]->rxic = DEFAULT_RXIC;
}
- /* always enable rx filer */
- priv->rx_filer_enable = 1;
+ /* Always enable rx filer if available */
+ priv->rx_filer_enable =
+ (priv->device_flags & FSL_GIANFAR_DEV_HAS_RX_FILER) ? 1 : 0;
/* Enable most messages by default */
priv->msg_enable = (NETIF_MSG_IFUP << 1 ) - 1;
/* use pritority h/w tx queue scheduling for single queue devices */
diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index f266b20f9ef5..cb77667971a7 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -923,6 +923,7 @@ struct gfar {
#define FSL_GIANFAR_DEV_HAS_BUF_STASHING 0x00000400
#define FSL_GIANFAR_DEV_HAS_TIMER 0x00000800
#define FSL_GIANFAR_DEV_HAS_WAKE_ON_FILER 0x00001000
+#define FSL_GIANFAR_DEV_HAS_RX_FILER 0x00002000
#if (MAXGROUPS == 2)
#define DEFAULT_MAPPING 0xAA
diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c
index 664d0c261269..b40fba929d65 100644
--- a/drivers/net/ethernet/freescale/gianfar_ptp.c
+++ b/drivers/net/ethernet/freescale/gianfar_ptp.c
@@ -467,7 +467,7 @@ static int gianfar_ptp_probe(struct platform_device *dev)
etsects->irq = platform_get_irq(dev, 0);
- if (etsects->irq == NO_IRQ) {
+ if (etsects->irq < 0) {
pr_err("irq not in device tree\n");
goto no_node;
}
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 2a98eba660c0..b674414a4d72 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -1259,12 +1259,8 @@ int hns_dsaf_set_mac_uc_entry(
if (MAC_IS_ALL_ZEROS(mac_entry->addr) ||
MAC_IS_BROADCAST(mac_entry->addr) ||
MAC_IS_MULTICAST(mac_entry->addr)) {
- dev_err(dsaf_dev->dev,
- "set_uc %s Mac %02x:%02x:%02x:%02x:%02x:%02x err!\n",
- dsaf_dev->ae_dev.name, mac_entry->addr[0],
- mac_entry->addr[1], mac_entry->addr[2],
- mac_entry->addr[3], mac_entry->addr[4],
- mac_entry->addr[5]);
+ dev_err(dsaf_dev->dev, "set_uc %s Mac %pM err!\n",
+ dsaf_dev->ae_dev.name, mac_entry->addr);
return -EINVAL;
}
@@ -1331,12 +1327,8 @@ int hns_dsaf_set_mac_mc_entry(
/* mac addr check */
if (MAC_IS_ALL_ZEROS(mac_entry->addr)) {
- dev_err(dsaf_dev->dev,
- "set uc %s Mac %02x:%02x:%02x:%02x:%02x:%02x err!\n",
- dsaf_dev->ae_dev.name, mac_entry->addr[0],
- mac_entry->addr[1], mac_entry->addr[2],
- mac_entry->addr[3],
- mac_entry->addr[4], mac_entry->addr[5]);
+ dev_err(dsaf_dev->dev, "set uc %s Mac %pM err!\n",
+ dsaf_dev->ae_dev.name, mac_entry->addr);
return -EINVAL;
}
@@ -1410,11 +1402,8 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
/*chechk mac addr */
if (MAC_IS_ALL_ZEROS(mac_entry->addr)) {
- dev_err(dsaf_dev->dev,
- "set_entry failed,addr %02x:%02x:%02x:%02x:%02x:%02x!\n",
- mac_entry->addr[0], mac_entry->addr[1],
- mac_entry->addr[2], mac_entry->addr[3],
- mac_entry->addr[4], mac_entry->addr[5]);
+ dev_err(dsaf_dev->dev, "set_entry failed,addr %pM!\n",
+ mac_entry->addr);
return -EINVAL;
}
@@ -1497,9 +1486,8 @@ int hns_dsaf_del_mac_entry(struct dsaf_device *dsaf_dev, u16 vlan_id,
/*check mac addr */
if (MAC_IS_ALL_ZEROS(addr) || MAC_IS_BROADCAST(addr)) {
- dev_err(dsaf_dev->dev,
- "del_entry failed,addr %02x:%02x:%02x:%02x:%02x:%02x!\n",
- addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+ dev_err(dsaf_dev->dev, "del_entry failed,addr %pM!\n",
+ addr);
return -EINVAL;
}
@@ -1563,11 +1551,8 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
/*check mac addr */
if (MAC_IS_ALL_ZEROS(mac_entry->addr)) {
- dev_err(dsaf_dev->dev,
- "del_port failed, addr %02x:%02x:%02x:%02x:%02x:%02x!\n",
- mac_entry->addr[0], mac_entry->addr[1],
- mac_entry->addr[2], mac_entry->addr[3],
- mac_entry->addr[4], mac_entry->addr[5]);
+ dev_err(dsaf_dev->dev, "del_port failed, addr %pM!\n",
+ mac_entry->addr);
return -EINVAL;
}
@@ -1644,11 +1629,8 @@ int hns_dsaf_get_mac_uc_entry(struct dsaf_device *dsaf_dev,
/* check macaddr */
if (MAC_IS_ALL_ZEROS(mac_entry->addr) ||
MAC_IS_BROADCAST(mac_entry->addr)) {
- dev_err(dsaf_dev->dev,
- "get_entry failed,addr %02x:%02x:%02x:%02x:%02x:%02x\n",
- mac_entry->addr[0], mac_entry->addr[1],
- mac_entry->addr[2], mac_entry->addr[3],
- mac_entry->addr[4], mac_entry->addr[5]);
+ dev_err(dsaf_dev->dev, "get_entry failed,addr %pM\n",
+ mac_entry->addr);
return -EINVAL;
}
@@ -1695,11 +1677,8 @@ int hns_dsaf_get_mac_mc_entry(struct dsaf_device *dsaf_dev,
/*check mac addr */
if (MAC_IS_ALL_ZEROS(mac_entry->addr) ||
MAC_IS_BROADCAST(mac_entry->addr)) {
- dev_err(dsaf_dev->dev,
- "get_entry failed,addr %02x:%02x:%02x:%02x:%02x:%02x\n",
- mac_entry->addr[0], mac_entry->addr[1],
- mac_entry->addr[2], mac_entry->addr[3],
- mac_entry->addr[4], mac_entry->addr[5]);
+ dev_err(dsaf_dev->dev, "get_entry failed,addr %pM\n",
+ mac_entry->addr);
return -EINVAL;
}
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index b475e1bf2e6f..bdbd80423b17 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
@@ -898,7 +898,7 @@
#define XGMAC_PAUSE_CTL_RSP_MODE_B 2
#define XGMAC_PAUSE_CTL_TX_XOFF_B 3
-static inline void dsaf_write_reg(void *base, u32 reg, u32 value)
+static inline void dsaf_write_reg(void __iomem *base, u32 reg, u32 value)
{
u8 __iomem *reg_addr = ACCESS_ONCE(base);
@@ -908,7 +908,7 @@ static inline void dsaf_write_reg(void *base, u32 reg, u32 value)
#define dsaf_write_dev(a, reg, value) \
dsaf_write_reg((a)->io_base, (reg), (value))
-static inline u32 dsaf_read_reg(u8 *base, u32 reg)
+static inline u32 dsaf_read_reg(u8 __iomem *base, u32 reg)
{
u8 __iomem *reg_addr = ACCESS_ONCE(base);
@@ -927,8 +927,8 @@ static inline u32 dsaf_read_reg(u8 *base, u32 reg)
#define dsaf_set_bit(origin, shift, val) \
dsaf_set_field((origin), (1ull << (shift)), (shift), (val))
-static inline void dsaf_set_reg_field(void *base, u32 reg, u32 mask, u32 shift,
- u32 val)
+static inline void dsaf_set_reg_field(void __iomem *base, u32 reg, u32 mask,
+ u32 shift, u32 val)
{
u32 origin = dsaf_read_reg(base, reg);
@@ -947,7 +947,8 @@ static inline void dsaf_set_reg_field(void *base, u32 reg, u32 mask, u32 shift,
#define dsaf_get_bit(origin, shift) \
dsaf_get_field((origin), (1ull << (shift)), (shift))
-static inline u32 dsaf_get_reg_field(void *base, u32 reg, u32 mask, u32 shift)
+static inline u32 dsaf_get_reg_field(void __iomem *base, u32 reg, u32 mask,
+ u32 shift)
{
u32 origin;
diff --git a/drivers/net/ethernet/icplus/Kconfig b/drivers/net/ethernet/icplus/Kconfig
deleted file mode 100644
index 14a66e9d2e26..000000000000
--- a/drivers/net/ethernet/icplus/Kconfig
+++ /dev/null
@@ -1,13 +0,0 @@
-#
-# IC Plus device configuration
-#
-
-config IP1000
- tristate "IP1000 Gigabit Ethernet support"
- depends on PCI
- select MII
- ---help---
- This driver supports IP1000 gigabit Ethernet cards.
-
- To compile this driver as a module, choose M here: the module
- will be called ipg. This is recommended.
diff --git a/drivers/net/ethernet/icplus/Makefile b/drivers/net/ethernet/icplus/Makefile
deleted file mode 100644
index 5bc87c1f36aa..000000000000
--- a/drivers/net/ethernet/icplus/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for the IC Plus device drivers
-#
-
-obj-$(CONFIG_IP1000) += ipg.o
diff --git a/drivers/net/ethernet/icplus/ipg.c b/drivers/net/ethernet/icplus/ipg.c
deleted file mode 100644
index c3b6af83f070..000000000000
--- a/drivers/net/ethernet/icplus/ipg.c
+++ /dev/null
@@ -1,2300 +0,0 @@
-/*
- * ipg.c: Device Driver for the IP1000 Gigabit Ethernet Adapter
- *
- * Copyright (C) 2003, 2007 IC Plus Corp
- *
- * Original Author:
- *
- * Craig Rich
- * Sundance Technology, Inc.
- * www.sundanceti.com
- * craig_rich@sundanceti.com
- *
- * Current Maintainer:
- *
- * Sorbica Shieh.
- * http://www.icplus.com.tw
- * sorbica@icplus.com.tw
- *
- * Jesse Huang
- * http://www.icplus.com.tw
- * jesse@icplus.com.tw
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/crc32.h>
-#include <linux/ethtool.h>
-#include <linux/interrupt.h>
-#include <linux/gfp.h>
-#include <linux/mii.h>
-#include <linux/mutex.h>
-
-#include <asm/div64.h>
-
-#define IPG_RX_RING_BYTES (sizeof(struct ipg_rx) * IPG_RFDLIST_LENGTH)
-#define IPG_TX_RING_BYTES (sizeof(struct ipg_tx) * IPG_TFDLIST_LENGTH)
-#define IPG_RESET_MASK \
- (IPG_AC_GLOBAL_RESET | IPG_AC_RX_RESET | IPG_AC_TX_RESET | \
- IPG_AC_DMA | IPG_AC_FIFO | IPG_AC_NETWORK | IPG_AC_HOST | \
- IPG_AC_AUTO_INIT)
-
-#define ipg_w32(val32, reg) iowrite32((val32), ioaddr + (reg))
-#define ipg_w16(val16, reg) iowrite16((val16), ioaddr + (reg))
-#define ipg_w8(val8, reg) iowrite8((val8), ioaddr + (reg))
-
-#define ipg_r32(reg) ioread32(ioaddr + (reg))
-#define ipg_r16(reg) ioread16(ioaddr + (reg))
-#define ipg_r8(reg) ioread8(ioaddr + (reg))
-
-enum {
- netdev_io_size = 128
-};
-
-#include "ipg.h"
-#define DRV_NAME "ipg"
-
-MODULE_AUTHOR("IC Plus Corp. 2003");
-MODULE_DESCRIPTION("IC Plus IP1000 Gigabit Ethernet Adapter Linux Driver");
-MODULE_LICENSE("GPL");
-
-/*
- * Defaults
- */
-#define IPG_MAX_RXFRAME_SIZE 0x0600
-#define IPG_RXFRAG_SIZE 0x0600
-#define IPG_RXSUPPORT_SIZE 0x0600
-#define IPG_IS_JUMBO false
-
-/*
- * Variable record -- index by leading revision/length
- * Revision/Length(=N*4), Address1, Data1, Address2, Data2,...,AddressN,DataN
- */
-static const unsigned short DefaultPhyParam[] = {
- /* 11/12/03 IP1000A v1-3 rev=0x40 */
- /*--------------------------------------------------------------------------
- (0x4000|(15*4)), 31, 0x0001, 27, 0x01e0, 31, 0x0002, 22, 0x85bd, 24, 0xfff2,
- 27, 0x0c10, 28, 0x0c10, 29, 0x2c10, 31, 0x0003, 23, 0x92f6,
- 31, 0x0000, 23, 0x003d, 30, 0x00de, 20, 0x20e7, 9, 0x0700,
- --------------------------------------------------------------------------*/
- /* 12/17/03 IP1000A v1-4 rev=0x40 */
- (0x4000 | (07 * 4)), 31, 0x0001, 27, 0x01e0, 31, 0x0002, 27, 0xeb8e, 31,
- 0x0000,
- 30, 0x005e, 9, 0x0700,
- /* 01/09/04 IP1000A v1-5 rev=0x41 */
- (0x4100 | (07 * 4)), 31, 0x0001, 27, 0x01e0, 31, 0x0002, 27, 0xeb8e, 31,
- 0x0000,
- 30, 0x005e, 9, 0x0700,
- 0x0000
-};
-
-static const char * const ipg_brand_name[] = {
- "IC PLUS IP1000 1000/100/10 based NIC",
- "Sundance Technology ST2021 based NIC",
- "Tamarack Microelectronics TC9020/9021 based NIC",
- "D-Link NIC IP1000A"
-};
-
-static const struct pci_device_id ipg_pci_tbl[] = {
- { PCI_VDEVICE(SUNDANCE, 0x1023), 0 },
- { PCI_VDEVICE(SUNDANCE, 0x2021), 1 },
- { PCI_VDEVICE(DLINK, 0x9021), 2 },
- { PCI_VDEVICE(DLINK, 0x4020), 3 },
- { 0, }
-};
-
-MODULE_DEVICE_TABLE(pci, ipg_pci_tbl);
-
-static inline void __iomem *ipg_ioaddr(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- return sp->ioaddr;
-}
-
-#ifdef IPG_DEBUG
-static void ipg_dump_rfdlist(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- void __iomem *ioaddr = sp->ioaddr;
- unsigned int i;
- u32 offset;
-
- IPG_DEBUG_MSG("_dump_rfdlist\n");
-
- netdev_info(dev, "rx_current = %02x\n", sp->rx_current);
- netdev_info(dev, "rx_dirty = %02x\n", sp->rx_dirty);
- netdev_info(dev, "RFDList start address = %016lx\n",
- (unsigned long)sp->rxd_map);
- netdev_info(dev, "RFDListPtr register = %08x%08x\n",
- ipg_r32(IPG_RFDLISTPTR1), ipg_r32(IPG_RFDLISTPTR0));
-
- for (i = 0; i < IPG_RFDLIST_LENGTH; i++) {
- offset = (u32) &sp->rxd[i].next_desc - (u32) sp->rxd;
- netdev_info(dev, "%02x %04x RFDNextPtr = %016lx\n",
- i, offset, (unsigned long)sp->rxd[i].next_desc);
- offset = (u32) &sp->rxd[i].rfs - (u32) sp->rxd;
- netdev_info(dev, "%02x %04x RFS = %016lx\n",
- i, offset, (unsigned long)sp->rxd[i].rfs);
- offset = (u32) &sp->rxd[i].frag_info - (u32) sp->rxd;
- netdev_info(dev, "%02x %04x frag_info = %016lx\n",
- i, offset, (unsigned long)sp->rxd[i].frag_info);
- }
-}
-
-static void ipg_dump_tfdlist(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- void __iomem *ioaddr = sp->ioaddr;
- unsigned int i;
- u32 offset;
-
- IPG_DEBUG_MSG("_dump_tfdlist\n");
-
- netdev_info(dev, "tx_current = %02x\n", sp->tx_current);
- netdev_info(dev, "tx_dirty = %02x\n", sp->tx_dirty);
- netdev_info(dev, "TFDList start address = %016lx\n",
- (unsigned long) sp->txd_map);
- netdev_info(dev, "TFDListPtr register = %08x%08x\n",
- ipg_r32(IPG_TFDLISTPTR1), ipg_r32(IPG_TFDLISTPTR0));
-
- for (i = 0; i < IPG_TFDLIST_LENGTH; i++) {
- offset = (u32) &sp->txd[i].next_desc - (u32) sp->txd;
- netdev_info(dev, "%02x %04x TFDNextPtr = %016lx\n",
- i, offset, (unsigned long)sp->txd[i].next_desc);
-
- offset = (u32) &sp->txd[i].tfc - (u32) sp->txd;
- netdev_info(dev, "%02x %04x TFC = %016lx\n",
- i, offset, (unsigned long) sp->txd[i].tfc);
- offset = (u32) &sp->txd[i].frag_info - (u32) sp->txd;
- netdev_info(dev, "%02x %04x frag_info = %016lx\n",
- i, offset, (unsigned long) sp->txd[i].frag_info);
- }
-}
-#endif
-
-static void ipg_write_phy_ctl(void __iomem *ioaddr, u8 data)
-{
- ipg_w8(IPG_PC_RSVD_MASK & data, PHY_CTRL);
- ndelay(IPG_PC_PHYCTRLWAIT_NS);
-}
-
-static void ipg_drive_phy_ctl_low_high(void __iomem *ioaddr, u8 data)
-{
- ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_LO | data);
- ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_HI | data);
-}
-
-static void send_three_state(void __iomem *ioaddr, u8 phyctrlpolarity)
-{
- phyctrlpolarity |= (IPG_PC_MGMTDATA & 0) | IPG_PC_MGMTDIR;
-
- ipg_drive_phy_ctl_low_high(ioaddr, phyctrlpolarity);
-}
-
-static void send_end(void __iomem *ioaddr, u8 phyctrlpolarity)
-{
- ipg_w8((IPG_PC_MGMTCLK_LO | (IPG_PC_MGMTDATA & 0) | IPG_PC_MGMTDIR |
- phyctrlpolarity) & IPG_PC_RSVD_MASK, PHY_CTRL);
-}
-
-static u16 read_phy_bit(void __iomem *ioaddr, u8 phyctrlpolarity)
-{
- u16 bit_data;
-
- ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_LO | phyctrlpolarity);
-
- bit_data = ((ipg_r8(PHY_CTRL) & IPG_PC_MGMTDATA) >> 1) & 1;
-
- ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_HI | phyctrlpolarity);
-
- return bit_data;
-}
-
-/*
- * Read a register from the Physical Layer device located
- * on the IPG NIC, using the IPG PHYCTRL register.
- */
-static int mdio_read(struct net_device *dev, int phy_id, int phy_reg)
-{
- void __iomem *ioaddr = ipg_ioaddr(dev);
- /*
- * The GMII mangement frame structure for a read is as follows:
- *
- * |Preamble|st|op|phyad|regad|ta| data |idle|
- * |< 32 1s>|01|10|AAAAA|RRRRR|z0|DDDDDDDDDDDDDDDD|z |
- *
- * <32 1s> = 32 consecutive logic 1 values
- * A = bit of Physical Layer device address (MSB first)
- * R = bit of register address (MSB first)
- * z = High impedance state
- * D = bit of read data (MSB first)
- *
- * Transmission order is 'Preamble' field first, bits transmitted
- * left to right (first to last).
- */
- struct {
- u32 field;
- unsigned int len;
- } p[] = {
- { GMII_PREAMBLE, 32 }, /* Preamble */
- { GMII_ST, 2 }, /* ST */
- { GMII_READ, 2 }, /* OP */
- { phy_id, 5 }, /* PHYAD */
- { phy_reg, 5 }, /* REGAD */
- { 0x0000, 2 }, /* TA */
- { 0x0000, 16 }, /* DATA */
- { 0x0000, 1 } /* IDLE */
- };
- unsigned int i, j;
- u8 polarity, data;
-
- polarity = ipg_r8(PHY_CTRL);
- polarity &= (IPG_PC_DUPLEX_POLARITY | IPG_PC_LINK_POLARITY);
-
- /* Create the Preamble, ST, OP, PHYAD, and REGAD field. */
- for (j = 0; j < 5; j++) {
- for (i = 0; i < p[j].len; i++) {
- /* For each variable length field, the MSB must be
- * transmitted first. Rotate through the field bits,
- * starting with the MSB, and move each bit into the
- * the 1st (2^1) bit position (this is the bit position
- * corresponding to the MgmtData bit of the PhyCtrl
- * register for the IPG).
- *
- * Example: ST = 01;
- *
- * First write a '0' to bit 1 of the PhyCtrl
- * register, then write a '1' to bit 1 of the
- * PhyCtrl register.
- *
- * To do this, right shift the MSB of ST by the value:
- * [field length - 1 - #ST bits already written]
- * then left shift this result by 1.
- */
- data = (p[j].field >> (p[j].len - 1 - i)) << 1;
- data &= IPG_PC_MGMTDATA;
- data |= polarity | IPG_PC_MGMTDIR;
-
- ipg_drive_phy_ctl_low_high(ioaddr, data);
- }
- }
-
- send_three_state(ioaddr, polarity);
-
- read_phy_bit(ioaddr, polarity);
-
- /*
- * For a read cycle, the bits for the next two fields (TA and
- * DATA) are driven by the PHY (the IPG reads these bits).
- */
- for (i = 0; i < p[6].len; i++) {
- p[6].field |=
- (read_phy_bit(ioaddr, polarity) << (p[6].len - 1 - i));
- }
-
- send_three_state(ioaddr, polarity);
- send_three_state(ioaddr, polarity);
- send_three_state(ioaddr, polarity);
- send_end(ioaddr, polarity);
-
- /* Return the value of the DATA field. */
- return p[6].field;
-}
-
-/*
- * Write to a register from the Physical Layer device located
- * on the IPG NIC, using the IPG PHYCTRL register.
- */
-static void mdio_write(struct net_device *dev, int phy_id, int phy_reg, int val)
-{
- void __iomem *ioaddr = ipg_ioaddr(dev);
- /*
- * The GMII mangement frame structure for a read is as follows:
- *
- * |Preamble|st|op|phyad|regad|ta| data |idle|
- * |< 32 1s>|01|10|AAAAA|RRRRR|z0|DDDDDDDDDDDDDDDD|z |
- *
- * <32 1s> = 32 consecutive logic 1 values
- * A = bit of Physical Layer device address (MSB first)
- * R = bit of register address (MSB first)
- * z = High impedance state
- * D = bit of write data (MSB first)
- *
- * Transmission order is 'Preamble' field first, bits transmitted
- * left to right (first to last).
- */
- struct {
- u32 field;
- unsigned int len;
- } p[] = {
- { GMII_PREAMBLE, 32 }, /* Preamble */
- { GMII_ST, 2 }, /* ST */
- { GMII_WRITE, 2 }, /* OP */
- { phy_id, 5 }, /* PHYAD */
- { phy_reg, 5 }, /* REGAD */
- { 0x0002, 2 }, /* TA */
- { val & 0xffff, 16 }, /* DATA */
- { 0x0000, 1 } /* IDLE */
- };
- unsigned int i, j;
- u8 polarity, data;
-
- polarity = ipg_r8(PHY_CTRL);
- polarity &= (IPG_PC_DUPLEX_POLARITY | IPG_PC_LINK_POLARITY);
-
- /* Create the Preamble, ST, OP, PHYAD, and REGAD field. */
- for (j = 0; j < 7; j++) {
- for (i = 0; i < p[j].len; i++) {
- /* For each variable length field, the MSB must be
- * transmitted first. Rotate through the field bits,
- * starting with the MSB, and move each bit into the
- * the 1st (2^1) bit position (this is the bit position
- * corresponding to the MgmtData bit of the PhyCtrl
- * register for the IPG).
- *
- * Example: ST = 01;
- *
- * First write a '0' to bit 1 of the PhyCtrl
- * register, then write a '1' to bit 1 of the
- * PhyCtrl register.
- *
- * To do this, right shift the MSB of ST by the value:
- * [field length - 1 - #ST bits already written]
- * then left shift this result by 1.
- */
- data = (p[j].field >> (p[j].len - 1 - i)) << 1;
- data &= IPG_PC_MGMTDATA;
- data |= polarity | IPG_PC_MGMTDIR;
-
- ipg_drive_phy_ctl_low_high(ioaddr, data);
- }
- }
-
- /* The last cycle is a tri-state, so read from the PHY. */
- ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_LO | polarity);
- ipg_r8(PHY_CTRL);
- ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_HI | polarity);
-}
-
-static void ipg_set_led_mode(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- void __iomem *ioaddr = sp->ioaddr;
- u32 mode;
-
- mode = ipg_r32(ASIC_CTRL);
- mode &= ~(IPG_AC_LED_MODE_BIT_1 | IPG_AC_LED_MODE | IPG_AC_LED_SPEED);
-
- if ((sp->led_mode & 0x03) > 1)
- mode |= IPG_AC_LED_MODE_BIT_1; /* Write Asic Control Bit 29 */
-
- if ((sp->led_mode & 0x01) == 1)
- mode |= IPG_AC_LED_MODE; /* Write Asic Control Bit 14 */
-
- if ((sp->led_mode & 0x08) == 8)
- mode |= IPG_AC_LED_SPEED; /* Write Asic Control Bit 27 */
-
- ipg_w32(mode, ASIC_CTRL);
-}
-
-static void ipg_set_phy_set(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- void __iomem *ioaddr = sp->ioaddr;
- int physet;
-
- physet = ipg_r8(PHY_SET);
- physet &= ~(IPG_PS_MEM_LENB9B | IPG_PS_MEM_LEN9 | IPG_PS_NON_COMPDET);
- physet |= ((sp->led_mode & 0x70) >> 4);
- ipg_w8(physet, PHY_SET);
-}
-
-static int ipg_reset(struct net_device *dev, u32 resetflags)
-{
- /* Assert functional resets via the IPG AsicCtrl
- * register as specified by the 'resetflags' input
- * parameter.
- */
- void __iomem *ioaddr = ipg_ioaddr(dev);
- unsigned int timeout_count = 0;
-
- IPG_DEBUG_MSG("_reset\n");
-
- ipg_w32(ipg_r32(ASIC_CTRL) | resetflags, ASIC_CTRL);
-
- /* Delay added to account for problem with 10Mbps reset. */
- mdelay(IPG_AC_RESETWAIT);
-
- while (IPG_AC_RESET_BUSY & ipg_r32(ASIC_CTRL)) {
- mdelay(IPG_AC_RESETWAIT);
- if (++timeout_count > IPG_AC_RESET_TIMEOUT)
- return -ETIME;
- }
- /* Set LED Mode in Asic Control */
- ipg_set_led_mode(dev);
-
- /* Set PHYSet Register Value */
- ipg_set_phy_set(dev);
- return 0;
-}
-
-/* Find the GMII PHY address. */
-static int ipg_find_phyaddr(struct net_device *dev)
-{
- unsigned int phyaddr, i;
-
- for (i = 0; i < 32; i++) {
- u32 status;
-
- /* Search for the correct PHY address among 32 possible. */
- phyaddr = (IPG_NIC_PHY_ADDRESS + i) % 32;
-
- /* 10/22/03 Grace change verify from GMII_PHY_STATUS to
- GMII_PHY_ID1
- */
-
- status = mdio_read(dev, phyaddr, MII_BMSR);
-
- if ((status != 0xFFFF) && (status != 0))
- return phyaddr;
- }
-
- return 0x1f;
-}
-
-/*
- * Configure IPG based on result of IEEE 802.3 PHY
- * auto-negotiation.
- */
-static int ipg_config_autoneg(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- void __iomem *ioaddr = sp->ioaddr;
- unsigned int txflowcontrol;
- unsigned int rxflowcontrol;
- unsigned int fullduplex;
- u32 mac_ctrl_val;
- u32 asicctrl;
- u8 phyctrl;
- const char *speed;
- const char *duplex;
- const char *tx_desc;
- const char *rx_desc;
-
- IPG_DEBUG_MSG("_config_autoneg\n");
-
- asicctrl = ipg_r32(ASIC_CTRL);
- phyctrl = ipg_r8(PHY_CTRL);
- mac_ctrl_val = ipg_r32(MAC_CTRL);
-
- /* Set flags for use in resolving auto-negotiation, assuming
- * non-1000Mbps, half duplex, no flow control.
- */
- fullduplex = 0;
- txflowcontrol = 0;
- rxflowcontrol = 0;
-
- /* To accommodate a problem in 10Mbps operation,
- * set a global flag if PHY running in 10Mbps mode.
- */
- sp->tenmbpsmode = 0;
-
- /* Determine actual speed of operation. */
- switch (phyctrl & IPG_PC_LINK_SPEED) {
- case IPG_PC_LINK_SPEED_10MBPS:
- speed = "10Mbps";
- sp->tenmbpsmode = 1;
- break;
- case IPG_PC_LINK_SPEED_100MBPS:
- speed = "100Mbps";
- break;
- case IPG_PC_LINK_SPEED_1000MBPS:
- speed = "1000Mbps";
- break;
- default:
- speed = "undefined!";
- return 0;
- }
-
- netdev_info(dev, "Link speed = %s\n", speed);
- if (sp->tenmbpsmode == 1)
- netdev_info(dev, "10Mbps operational mode enabled\n");
-
- if (phyctrl & IPG_PC_DUPLEX_STATUS) {
- fullduplex = 1;
- txflowcontrol = 1;
- rxflowcontrol = 1;
- }
-
- /* Configure full duplex, and flow control. */
- if (fullduplex == 1) {
-
- /* Configure IPG for full duplex operation. */
-
- duplex = "full";
-
- mac_ctrl_val |= IPG_MC_DUPLEX_SELECT_FD;
-
- if (txflowcontrol == 1) {
- tx_desc = "";
- mac_ctrl_val |= IPG_MC_TX_FLOW_CONTROL_ENABLE;
- } else {
- tx_desc = "no ";
- mac_ctrl_val &= ~IPG_MC_TX_FLOW_CONTROL_ENABLE;
- }
-
- if (rxflowcontrol == 1) {
- rx_desc = "";
- mac_ctrl_val |= IPG_MC_RX_FLOW_CONTROL_ENABLE;
- } else {
- rx_desc = "no ";
- mac_ctrl_val &= ~IPG_MC_RX_FLOW_CONTROL_ENABLE;
- }
- } else {
- duplex = "half";
- tx_desc = "no ";
- rx_desc = "no ";
- mac_ctrl_val &= (~IPG_MC_DUPLEX_SELECT_FD &
- ~IPG_MC_TX_FLOW_CONTROL_ENABLE &
- ~IPG_MC_RX_FLOW_CONTROL_ENABLE);
- }
-
- netdev_info(dev, "setting %s duplex, %sTX, %sRX flow control\n",
- duplex, tx_desc, rx_desc);
- ipg_w32(mac_ctrl_val, MAC_CTRL);
-
- return 0;
-}
-
-/* Determine and configure multicast operation and set
- * receive mode for IPG.
- */
-static void ipg_nic_set_multicast_list(struct net_device *dev)
-{
- void __iomem *ioaddr = ipg_ioaddr(dev);
- struct netdev_hw_addr *ha;
- unsigned int hashindex;
- u32 hashtable[2];
- u8 receivemode;
-
- IPG_DEBUG_MSG("_nic_set_multicast_list\n");
-
- receivemode = IPG_RM_RECEIVEUNICAST | IPG_RM_RECEIVEBROADCAST;
-
- if (dev->flags & IFF_PROMISC) {
- /* NIC to be configured in promiscuous mode. */
- receivemode = IPG_RM_RECEIVEALLFRAMES;
- } else if ((dev->flags & IFF_ALLMULTI) ||
- ((dev->flags & IFF_MULTICAST) &&
- (netdev_mc_count(dev) > IPG_MULTICAST_HASHTABLE_SIZE))) {
- /* NIC to be configured to receive all multicast
- * frames. */
- receivemode |= IPG_RM_RECEIVEMULTICAST;
- } else if ((dev->flags & IFF_MULTICAST) && !netdev_mc_empty(dev)) {
- /* NIC to be configured to receive selected
- * multicast addresses. */
- receivemode |= IPG_RM_RECEIVEMULTICASTHASH;
- }
-
- /* Calculate the bits to set for the 64 bit, IPG HASHTABLE.
- * The IPG applies a cyclic-redundancy-check (the same CRC
- * used to calculate the frame data FCS) to the destination
- * address all incoming multicast frames whose destination
- * address has the multicast bit set. The least significant
- * 6 bits of the CRC result are used as an addressing index
- * into the hash table. If the value of the bit addressed by
- * this index is a 1, the frame is passed to the host system.
- */
-
- /* Clear hashtable. */
- hashtable[0] = 0x00000000;
- hashtable[1] = 0x00000000;
-
- /* Cycle through all multicast addresses to filter. */
- netdev_for_each_mc_addr(ha, dev) {
- /* Calculate CRC result for each multicast address. */
- hashindex = crc32_le(0xffffffff, ha->addr,
- ETH_ALEN);
-
- /* Use only the least significant 6 bits. */
- hashindex = hashindex & 0x3F;
-
- /* Within "hashtable", set bit number "hashindex"
- * to a logic 1.
- */
- set_bit(hashindex, (void *)hashtable);
- }
-
- /* Write the value of the hashtable, to the 4, 16 bit
- * HASHTABLE IPG registers.
- */
- ipg_w32(hashtable[0], HASHTABLE_0);
- ipg_w32(hashtable[1], HASHTABLE_1);
-
- ipg_w8(IPG_RM_RSVD_MASK & receivemode, RECEIVE_MODE);
-
- IPG_DEBUG_MSG("ReceiveMode = %x\n", ipg_r8(RECEIVE_MODE));
-}
-
-static int ipg_io_config(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- void __iomem *ioaddr = ipg_ioaddr(dev);
- u32 origmacctrl;
- u32 restoremacctrl;
-
- IPG_DEBUG_MSG("_io_config\n");
-
- origmacctrl = ipg_r32(MAC_CTRL);
-
- restoremacctrl = origmacctrl | IPG_MC_STATISTICS_ENABLE;
-
- /* Based on compilation option, determine if FCS is to be
- * stripped on receive frames by IPG.
- */
- if (!IPG_STRIP_FCS_ON_RX)
- restoremacctrl |= IPG_MC_RCV_FCS;
-
- /* Determine if transmitter and/or receiver are
- * enabled so we may restore MACCTRL correctly.
- */
- if (origmacctrl & IPG_MC_TX_ENABLED)
- restoremacctrl |= IPG_MC_TX_ENABLE;
-
- if (origmacctrl & IPG_MC_RX_ENABLED)
- restoremacctrl |= IPG_MC_RX_ENABLE;
-
- /* Transmitter and receiver must be disabled before setting
- * IFSSelect.
- */
- ipg_w32((origmacctrl & (IPG_MC_RX_DISABLE | IPG_MC_TX_DISABLE)) &
- IPG_MC_RSVD_MASK, MAC_CTRL);
-
- /* Now that transmitter and receiver are disabled, write
- * to IFSSelect.
- */
- ipg_w32((origmacctrl & IPG_MC_IFS_96BIT) & IPG_MC_RSVD_MASK, MAC_CTRL);
-
- /* Set RECEIVEMODE register. */
- ipg_nic_set_multicast_list(dev);
-
- ipg_w16(sp->max_rxframe_size, MAX_FRAME_SIZE);
-
- ipg_w8(IPG_RXDMAPOLLPERIOD_VALUE, RX_DMA_POLL_PERIOD);
- ipg_w8(IPG_RXDMAURGENTTHRESH_VALUE, RX_DMA_URGENT_THRESH);
- ipg_w8(IPG_RXDMABURSTTHRESH_VALUE, RX_DMA_BURST_THRESH);
- ipg_w8(IPG_TXDMAPOLLPERIOD_VALUE, TX_DMA_POLL_PERIOD);
- ipg_w8(IPG_TXDMAURGENTTHRESH_VALUE, TX_DMA_URGENT_THRESH);
- ipg_w8(IPG_TXDMABURSTTHRESH_VALUE, TX_DMA_BURST_THRESH);
- ipg_w16((IPG_IE_HOST_ERROR | IPG_IE_TX_DMA_COMPLETE |
- IPG_IE_TX_COMPLETE | IPG_IE_INT_REQUESTED |
- IPG_IE_UPDATE_STATS | IPG_IE_LINK_EVENT |
- IPG_IE_RX_DMA_COMPLETE | IPG_IE_RX_DMA_PRIORITY), INT_ENABLE);
- ipg_w16(IPG_FLOWONTHRESH_VALUE, FLOW_ON_THRESH);
- ipg_w16(IPG_FLOWOFFTHRESH_VALUE, FLOW_OFF_THRESH);
-
- /* IPG multi-frag frame bug workaround.
- * Per silicon revision B3 eratta.
- */
- ipg_w16(ipg_r16(DEBUG_CTRL) | 0x0200, DEBUG_CTRL);
-
- /* IPG TX poll now bug workaround.
- * Per silicon revision B3 eratta.
- */
- ipg_w16(ipg_r16(DEBUG_CTRL) | 0x0010, DEBUG_CTRL);
-
- /* IPG RX poll now bug workaround.
- * Per silicon revision B3 eratta.
- */
- ipg_w16(ipg_r16(DEBUG_CTRL) | 0x0020, DEBUG_CTRL);
-
- /* Now restore MACCTRL to original setting. */
- ipg_w32(IPG_MC_RSVD_MASK & restoremacctrl, MAC_CTRL);
-
- /* Disable unused RMON statistics. */
- ipg_w32(IPG_RZ_ALL, RMON_STATISTICS_MASK);
-
- /* Disable unused MIB statistics. */
- ipg_w32(IPG_SM_MACCONTROLFRAMESXMTD | IPG_SM_MACCONTROLFRAMESRCVD |
- IPG_SM_BCSTOCTETXMTOK_BCSTFRAMESXMTDOK | IPG_SM_TXJUMBOFRAMES |
- IPG_SM_MCSTOCTETXMTOK_MCSTFRAMESXMTDOK | IPG_SM_RXJUMBOFRAMES |
- IPG_SM_BCSTOCTETRCVDOK_BCSTFRAMESRCVDOK |
- IPG_SM_UDPCHECKSUMERRORS | IPG_SM_TCPCHECKSUMERRORS |
- IPG_SM_IPCHECKSUMERRORS, STATISTICS_MASK);
-
- return 0;
-}
-
-/*
- * Create a receive buffer within system memory and update
- * NIC private structure appropriately.
- */
-static int ipg_get_rxbuff(struct net_device *dev, int entry)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- struct ipg_rx *rxfd = sp->rxd + entry;
- struct sk_buff *skb;
- u64 rxfragsize;
-
- IPG_DEBUG_MSG("_get_rxbuff\n");
-
- skb = netdev_alloc_skb_ip_align(dev, sp->rxsupport_size);
- if (!skb) {
- sp->rx_buff[entry] = NULL;
- return -ENOMEM;
- }
-
- /* Save the address of the sk_buff structure. */
- sp->rx_buff[entry] = skb;
-
- rxfd->frag_info = cpu_to_le64(pci_map_single(sp->pdev, skb->data,
- sp->rx_buf_sz, PCI_DMA_FROMDEVICE));
-
- /* Set the RFD fragment length. */
- rxfragsize = sp->rxfrag_size;
- rxfd->frag_info |= cpu_to_le64((rxfragsize << 48) & IPG_RFI_FRAGLEN);
-
- return 0;
-}
-
-static int init_rfdlist(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- void __iomem *ioaddr = sp->ioaddr;
- unsigned int i;
-
- IPG_DEBUG_MSG("_init_rfdlist\n");
-
- for (i = 0; i < IPG_RFDLIST_LENGTH; i++) {
- struct ipg_rx *rxfd = sp->rxd + i;
-
- if (sp->rx_buff[i]) {
- pci_unmap_single(sp->pdev,
- le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
- sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
- dev_kfree_skb_irq(sp->rx_buff[i]);
- sp->rx_buff[i] = NULL;
- }
-
- /* Clear out the RFS field. */
- rxfd->rfs = 0x0000000000000000;
-
- if (ipg_get_rxbuff(dev, i) < 0) {
- /*
- * A receive buffer was not ready, break the
- * RFD list here.
- */
- IPG_DEBUG_MSG("Cannot allocate Rx buffer\n");
-
- /* Just in case we cannot allocate a single RFD.
- * Should not occur.
- */
- if (i == 0) {
- netdev_err(dev, "No memory available for RFD list\n");
- return -ENOMEM;
- }
- }
-
- rxfd->next_desc = cpu_to_le64(sp->rxd_map +
- sizeof(struct ipg_rx)*(i + 1));
- }
- sp->rxd[i - 1].next_desc = cpu_to_le64(sp->rxd_map);
-
- sp->rx_current = 0;
- sp->rx_dirty = 0;
-
- /* Write the location of the RFDList to the IPG. */
- ipg_w32((u32) sp->rxd_map, RFD_LIST_PTR_0);
- ipg_w32(0x00000000, RFD_LIST_PTR_1);
-
- return 0;
-}
-
-static void init_tfdlist(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- void __iomem *ioaddr = sp->ioaddr;
- unsigned int i;
-
- IPG_DEBUG_MSG("_init_tfdlist\n");
-
- for (i = 0; i < IPG_TFDLIST_LENGTH; i++) {
- struct ipg_tx *txfd = sp->txd + i;
-
- txfd->tfc = cpu_to_le64(IPG_TFC_TFDDONE);
-
- if (sp->tx_buff[i]) {
- dev_kfree_skb_irq(sp->tx_buff[i]);
- sp->tx_buff[i] = NULL;
- }
-
- txfd->next_desc = cpu_to_le64(sp->txd_map +
- sizeof(struct ipg_tx)*(i + 1));
- }
- sp->txd[i - 1].next_desc = cpu_to_le64(sp->txd_map);
-
- sp->tx_current = 0;
- sp->tx_dirty = 0;
-
- /* Write the location of the TFDList to the IPG. */
- IPG_DDEBUG_MSG("Starting TFDListPtr = %08x\n",
- (u32) sp->txd_map);
- ipg_w32((u32) sp->txd_map, TFD_LIST_PTR_0);
- ipg_w32(0x00000000, TFD_LIST_PTR_1);
-
- sp->reset_current_tfd = 1;
-}
-
-/*
- * Free all transmit buffers which have already been transferred
- * via DMA to the IPG.
- */
-static void ipg_nic_txfree(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- unsigned int released, pending, dirty;
-
- IPG_DEBUG_MSG("_nic_txfree\n");
-
- pending = sp->tx_current - sp->tx_dirty;
- dirty = sp->tx_dirty % IPG_TFDLIST_LENGTH;
-
- for (released = 0; released < pending; released++) {
- struct sk_buff *skb = sp->tx_buff[dirty];
- struct ipg_tx *txfd = sp->txd + dirty;
-
- IPG_DEBUG_MSG("TFC = %016lx\n", (unsigned long) txfd->tfc);
-
- /* Look at each TFD's TFC field beginning
- * at the last freed TFD up to the current TFD.
- * If the TFDDone bit is set, free the associated
- * buffer.
- */
- if (!(txfd->tfc & cpu_to_le64(IPG_TFC_TFDDONE)))
- break;
-
- /* Free the transmit buffer. */
- if (skb) {
- pci_unmap_single(sp->pdev,
- le64_to_cpu(txfd->frag_info) & ~IPG_TFI_FRAGLEN,
- skb->len, PCI_DMA_TODEVICE);
-
- dev_kfree_skb_irq(skb);
-
- sp->tx_buff[dirty] = NULL;
- }
- dirty = (dirty + 1) % IPG_TFDLIST_LENGTH;
- }
-
- sp->tx_dirty += released;
-
- if (netif_queue_stopped(dev) &&
- (sp->tx_current != (sp->tx_dirty + IPG_TFDLIST_LENGTH))) {
- netif_wake_queue(dev);
- }
-}
-
-static void ipg_tx_timeout(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- void __iomem *ioaddr = sp->ioaddr;
-
- ipg_reset(dev, IPG_AC_TX_RESET | IPG_AC_DMA | IPG_AC_NETWORK |
- IPG_AC_FIFO);
-
- spin_lock_irq(&sp->lock);
-
- /* Re-configure after DMA reset. */
- if (ipg_io_config(dev) < 0)
- netdev_info(dev, "Error during re-configuration\n");
-
- init_tfdlist(dev);
-
- spin_unlock_irq(&sp->lock);
-
- ipg_w32((ipg_r32(MAC_CTRL) | IPG_MC_TX_ENABLE) & IPG_MC_RSVD_MASK,
- MAC_CTRL);
-}
-
-/*
- * For TxComplete interrupts, free all transmit
- * buffers which have already been transferred via DMA
- * to the IPG.
- */
-static void ipg_nic_txcleanup(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- void __iomem *ioaddr = sp->ioaddr;
- unsigned int i;
-
- IPG_DEBUG_MSG("_nic_txcleanup\n");
-
- for (i = 0; i < IPG_TFDLIST_LENGTH; i++) {
- /* Reading the TXSTATUS register clears the
- * TX_COMPLETE interrupt.
- */
- u32 txstatusdword = ipg_r32(TX_STATUS);
-
- IPG_DEBUG_MSG("TxStatus = %08x\n", txstatusdword);
-
- /* Check for Transmit errors. Error bits only valid if
- * TX_COMPLETE bit in the TXSTATUS register is a 1.
- */
- if (!(txstatusdword & IPG_TS_TX_COMPLETE))
- break;
-
- /* If in 10Mbps mode, indicate transmit is ready. */
- if (sp->tenmbpsmode) {
- netif_wake_queue(dev);
- }
-
- /* Transmit error, increment stat counters. */
- if (txstatusdword & IPG_TS_TX_ERROR) {
- IPG_DEBUG_MSG("Transmit error\n");
- sp->stats.tx_errors++;
- }
-
- /* Late collision, re-enable transmitter. */
- if (txstatusdword & IPG_TS_LATE_COLLISION) {
- IPG_DEBUG_MSG("Late collision on transmit\n");
- ipg_w32((ipg_r32(MAC_CTRL) | IPG_MC_TX_ENABLE) &
- IPG_MC_RSVD_MASK, MAC_CTRL);
- }
-
- /* Maximum collisions, re-enable transmitter. */
- if (txstatusdword & IPG_TS_TX_MAX_COLL) {
- IPG_DEBUG_MSG("Maximum collisions on transmit\n");
- ipg_w32((ipg_r32(MAC_CTRL) | IPG_MC_TX_ENABLE) &
- IPG_MC_RSVD_MASK, MAC_CTRL);
- }
-
- /* Transmit underrun, reset and re-enable
- * transmitter.
- */
- if (txstatusdword & IPG_TS_TX_UNDERRUN) {
- IPG_DEBUG_MSG("Transmitter underrun\n");
- sp->stats.tx_fifo_errors++;
- ipg_reset(dev, IPG_AC_TX_RESET | IPG_AC_DMA |
- IPG_AC_NETWORK | IPG_AC_FIFO);
-
- /* Re-configure after DMA reset. */
- if (ipg_io_config(dev) < 0) {
- netdev_info(dev, "Error during re-configuration\n");
- }
- init_tfdlist(dev);
-
- ipg_w32((ipg_r32(MAC_CTRL) | IPG_MC_TX_ENABLE) &
- IPG_MC_RSVD_MASK, MAC_CTRL);
- }
- }
-
- ipg_nic_txfree(dev);
-}
-
-/* Provides statistical information about the IPG NIC. */
-static struct net_device_stats *ipg_nic_get_stats(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- void __iomem *ioaddr = sp->ioaddr;
- u16 temp1;
- u16 temp2;
-
- IPG_DEBUG_MSG("_nic_get_stats\n");
-
- /* Check to see if the NIC has been initialized via nic_open,
- * before trying to read statistic registers.
- */
- if (!netif_running(dev))
- return &sp->stats;
-
- sp->stats.rx_packets += ipg_r32(IPG_FRAMESRCVDOK);
- sp->stats.tx_packets += ipg_r32(IPG_FRAMESXMTDOK);
- sp->stats.rx_bytes += ipg_r32(IPG_OCTETRCVOK);
- sp->stats.tx_bytes += ipg_r32(IPG_OCTETXMTOK);
- temp1 = ipg_r16(IPG_FRAMESLOSTRXERRORS);
- sp->stats.rx_errors += temp1;
- sp->stats.rx_missed_errors += temp1;
- temp1 = ipg_r32(IPG_SINGLECOLFRAMES) + ipg_r32(IPG_MULTICOLFRAMES) +
- ipg_r32(IPG_LATECOLLISIONS);
- temp2 = ipg_r16(IPG_CARRIERSENSEERRORS);
- sp->stats.collisions += temp1;
- sp->stats.tx_dropped += ipg_r16(IPG_FRAMESABORTXSCOLLS);
- sp->stats.tx_errors += ipg_r16(IPG_FRAMESWEXDEFERRAL) +
- ipg_r32(IPG_FRAMESWDEFERREDXMT) + temp1 + temp2;
- sp->stats.multicast += ipg_r32(IPG_MCSTOCTETRCVDOK);
-
- /* detailed tx_errors */
- sp->stats.tx_carrier_errors += temp2;
-
- /* detailed rx_errors */
- sp->stats.rx_length_errors += ipg_r16(IPG_INRANGELENGTHERRORS) +
- ipg_r16(IPG_FRAMETOOLONGERRORS);
- sp->stats.rx_crc_errors += ipg_r16(IPG_FRAMECHECKSEQERRORS);
-
- /* Unutilized IPG statistic registers. */
- ipg_r32(IPG_MCSTFRAMESRCVDOK);
-
- return &sp->stats;
-}
-
-/* Restore used receive buffers. */
-static int ipg_nic_rxrestore(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- const unsigned int curr = sp->rx_current;
- unsigned int dirty = sp->rx_dirty;
-
- IPG_DEBUG_MSG("_nic_rxrestore\n");
-
- for (dirty = sp->rx_dirty; curr - dirty > 0; dirty++) {
- unsigned int entry = dirty % IPG_RFDLIST_LENGTH;
-
- /* rx_copybreak may poke hole here and there. */
- if (sp->rx_buff[entry])
- continue;
-
- /* Generate a new receive buffer to replace the
- * current buffer (which will be released by the
- * Linux system).
- */
- if (ipg_get_rxbuff(dev, entry) < 0) {
- IPG_DEBUG_MSG("Cannot allocate new Rx buffer\n");
-
- break;
- }
-
- /* Reset the RFS field. */
- sp->rxd[entry].rfs = 0x0000000000000000;
- }
- sp->rx_dirty = dirty;
-
- return 0;
-}
-
-/* use jumboindex and jumbosize to control jumbo frame status
- * initial status is jumboindex=-1 and jumbosize=0
- * 1. jumboindex = -1 and jumbosize=0 : previous jumbo frame has been done.
- * 2. jumboindex != -1 and jumbosize != 0 : jumbo frame is not over size and receiving
- * 3. jumboindex = -1 and jumbosize != 0 : jumbo frame is over size, already dump
- * previous receiving and need to continue dumping the current one
- */
-enum {
- NORMAL_PACKET,
- ERROR_PACKET
-};
-
-enum {
- FRAME_NO_START_NO_END = 0,
- FRAME_WITH_START = 1,
- FRAME_WITH_END = 10,
- FRAME_WITH_START_WITH_END = 11
-};
-
-static void ipg_nic_rx_free_skb(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- unsigned int entry = sp->rx_current % IPG_RFDLIST_LENGTH;
-
- if (sp->rx_buff[entry]) {
- struct ipg_rx *rxfd = sp->rxd + entry;
-
- pci_unmap_single(sp->pdev,
- le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
- sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
- dev_kfree_skb_irq(sp->rx_buff[entry]);
- sp->rx_buff[entry] = NULL;
- }
-}
-
-static int ipg_nic_rx_check_frame_type(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- struct ipg_rx *rxfd = sp->rxd + (sp->rx_current % IPG_RFDLIST_LENGTH);
- int type = FRAME_NO_START_NO_END;
-
- if (le64_to_cpu(rxfd->rfs) & IPG_RFS_FRAMESTART)
- type += FRAME_WITH_START;
- if (le64_to_cpu(rxfd->rfs) & IPG_RFS_FRAMEEND)
- type += FRAME_WITH_END;
- return type;
-}
-
-static int ipg_nic_rx_check_error(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- unsigned int entry = sp->rx_current % IPG_RFDLIST_LENGTH;
- struct ipg_rx *rxfd = sp->rxd + entry;
-
- if (IPG_DROP_ON_RX_ETH_ERRORS && (le64_to_cpu(rxfd->rfs) &
- (IPG_RFS_RXFIFOOVERRUN | IPG_RFS_RXRUNTFRAME |
- IPG_RFS_RXALIGNMENTERROR | IPG_RFS_RXFCSERROR |
- IPG_RFS_RXOVERSIZEDFRAME | IPG_RFS_RXLENGTHERROR))) {
- IPG_DEBUG_MSG("Rx error, RFS = %016lx\n",
- (unsigned long) rxfd->rfs);
-
- /* Increment general receive error statistic. */
- sp->stats.rx_errors++;
-
- /* Increment detailed receive error statistics. */
- if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFIFOOVERRUN) {
- IPG_DEBUG_MSG("RX FIFO overrun occurred\n");
-
- sp->stats.rx_fifo_errors++;
- }
-
- if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXRUNTFRAME) {
- IPG_DEBUG_MSG("RX runt occurred\n");
- sp->stats.rx_length_errors++;
- }
-
- /* Do nothing for IPG_RFS_RXOVERSIZEDFRAME,
- * error count handled by a IPG statistic register.
- */
-
- if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXALIGNMENTERROR) {
- IPG_DEBUG_MSG("RX alignment error occurred\n");
- sp->stats.rx_frame_errors++;
- }
-
- /* Do nothing for IPG_RFS_RXFCSERROR, error count
- * handled by a IPG statistic register.
- */
-
- /* Free the memory associated with the RX
- * buffer since it is erroneous and we will
- * not pass it to higher layer processes.
- */
- if (sp->rx_buff[entry]) {
- pci_unmap_single(sp->pdev,
- le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
- sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
-
- dev_kfree_skb_irq(sp->rx_buff[entry]);
- sp->rx_buff[entry] = NULL;
- }
- return ERROR_PACKET;
- }
- return NORMAL_PACKET;
-}
-
-static void ipg_nic_rx_with_start_and_end(struct net_device *dev,
- struct ipg_nic_private *sp,
- struct ipg_rx *rxfd, unsigned entry)
-{
- struct ipg_jumbo *jumbo = &sp->jumbo;
- struct sk_buff *skb;
- int framelen;
-
- if (jumbo->found_start) {
- dev_kfree_skb_irq(jumbo->skb);
- jumbo->found_start = 0;
- jumbo->current_size = 0;
- jumbo->skb = NULL;
- }
-
- /* 1: found error, 0 no error */
- if (ipg_nic_rx_check_error(dev) != NORMAL_PACKET)
- return;
-
- skb = sp->rx_buff[entry];
- if (!skb)
- return;
-
- /* accept this frame and send to upper layer */
- framelen = le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFRAMELEN;
- if (framelen > sp->rxfrag_size)
- framelen = sp->rxfrag_size;
-
- skb_put(skb, framelen);
- skb->protocol = eth_type_trans(skb, dev);
- skb_checksum_none_assert(skb);
- netif_rx(skb);
- sp->rx_buff[entry] = NULL;
-}
-
-static void ipg_nic_rx_with_start(struct net_device *dev,
- struct ipg_nic_private *sp,
- struct ipg_rx *rxfd, unsigned entry)
-{
- struct ipg_jumbo *jumbo = &sp->jumbo;
- struct pci_dev *pdev = sp->pdev;
- struct sk_buff *skb;
-
- /* 1: found error, 0 no error */
- if (ipg_nic_rx_check_error(dev) != NORMAL_PACKET)
- return;
-
- /* accept this frame and send to upper layer */
- skb = sp->rx_buff[entry];
- if (!skb)
- return;
-
- if (jumbo->found_start)
- dev_kfree_skb_irq(jumbo->skb);
-
- pci_unmap_single(pdev, le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
- sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
-
- skb_put(skb, sp->rxfrag_size);
-
- jumbo->found_start = 1;
- jumbo->current_size = sp->rxfrag_size;
- jumbo->skb = skb;
-
- sp->rx_buff[entry] = NULL;
-}
-
-static void ipg_nic_rx_with_end(struct net_device *dev,
- struct ipg_nic_private *sp,
- struct ipg_rx *rxfd, unsigned entry)
-{
- struct ipg_jumbo *jumbo = &sp->jumbo;
-
- /* 1: found error, 0 no error */
- if (ipg_nic_rx_check_error(dev) == NORMAL_PACKET) {
- struct sk_buff *skb = sp->rx_buff[entry];
-
- if (!skb)
- return;
-
- if (jumbo->found_start) {
- int framelen, endframelen;
-
- framelen = le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFRAMELEN;
-
- endframelen = framelen - jumbo->current_size;
- if (framelen > sp->rxsupport_size)
- dev_kfree_skb_irq(jumbo->skb);
- else {
- memcpy(skb_put(jumbo->skb, endframelen),
- skb->data, endframelen);
-
- jumbo->skb->protocol =
- eth_type_trans(jumbo->skb, dev);
-
- skb_checksum_none_assert(jumbo->skb);
- netif_rx(jumbo->skb);
- }
- }
-
- jumbo->found_start = 0;
- jumbo->current_size = 0;
- jumbo->skb = NULL;
-
- ipg_nic_rx_free_skb(dev);
- } else {
- dev_kfree_skb_irq(jumbo->skb);
- jumbo->found_start = 0;
- jumbo->current_size = 0;
- jumbo->skb = NULL;
- }
-}
-
-static void ipg_nic_rx_no_start_no_end(struct net_device *dev,
- struct ipg_nic_private *sp,
- struct ipg_rx *rxfd, unsigned entry)
-{
- struct ipg_jumbo *jumbo = &sp->jumbo;
-
- /* 1: found error, 0 no error */
- if (ipg_nic_rx_check_error(dev) == NORMAL_PACKET) {
- struct sk_buff *skb = sp->rx_buff[entry];
-
- if (skb) {
- if (jumbo->found_start) {
- jumbo->current_size += sp->rxfrag_size;
- if (jumbo->current_size <= sp->rxsupport_size) {
- memcpy(skb_put(jumbo->skb,
- sp->rxfrag_size),
- skb->data, sp->rxfrag_size);
- }
- }
- ipg_nic_rx_free_skb(dev);
- }
- } else {
- dev_kfree_skb_irq(jumbo->skb);
- jumbo->found_start = 0;
- jumbo->current_size = 0;
- jumbo->skb = NULL;
- }
-}
-
-static int ipg_nic_rx_jumbo(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- unsigned int curr = sp->rx_current;
- void __iomem *ioaddr = sp->ioaddr;
- unsigned int i;
-
- IPG_DEBUG_MSG("_nic_rx\n");
-
- for (i = 0; i < IPG_MAXRFDPROCESS_COUNT; i++, curr++) {
- unsigned int entry = curr % IPG_RFDLIST_LENGTH;
- struct ipg_rx *rxfd = sp->rxd + entry;
-
- if (!(rxfd->rfs & cpu_to_le64(IPG_RFS_RFDDONE)))
- break;
-
- switch (ipg_nic_rx_check_frame_type(dev)) {
- case FRAME_WITH_START_WITH_END:
- ipg_nic_rx_with_start_and_end(dev, sp, rxfd, entry);
- break;
- case FRAME_WITH_START:
- ipg_nic_rx_with_start(dev, sp, rxfd, entry);
- break;
- case FRAME_WITH_END:
- ipg_nic_rx_with_end(dev, sp, rxfd, entry);
- break;
- case FRAME_NO_START_NO_END:
- ipg_nic_rx_no_start_no_end(dev, sp, rxfd, entry);
- break;
- }
- }
-
- sp->rx_current = curr;
-
- if (i == IPG_MAXRFDPROCESS_COUNT) {
- /* There are more RFDs to process, however the
- * allocated amount of RFD processing time has
- * expired. Assert Interrupt Requested to make
- * sure we come back to process the remaining RFDs.
- */
- ipg_w32(ipg_r32(ASIC_CTRL) | IPG_AC_INT_REQUEST, ASIC_CTRL);
- }
-
- ipg_nic_rxrestore(dev);
-
- return 0;
-}
-
-static int ipg_nic_rx(struct net_device *dev)
-{
- /* Transfer received Ethernet frames to higher network layers. */
- struct ipg_nic_private *sp = netdev_priv(dev);
- unsigned int curr = sp->rx_current;
- void __iomem *ioaddr = sp->ioaddr;
- struct ipg_rx *rxfd;
- unsigned int i;
-
- IPG_DEBUG_MSG("_nic_rx\n");
-
-#define __RFS_MASK \
- cpu_to_le64(IPG_RFS_RFDDONE | IPG_RFS_FRAMESTART | IPG_RFS_FRAMEEND)
-
- for (i = 0; i < IPG_MAXRFDPROCESS_COUNT; i++, curr++) {
- unsigned int entry = curr % IPG_RFDLIST_LENGTH;
- struct sk_buff *skb = sp->rx_buff[entry];
- unsigned int framelen;
-
- rxfd = sp->rxd + entry;
-
- if (((rxfd->rfs & __RFS_MASK) != __RFS_MASK) || !skb)
- break;
-
- /* Get received frame length. */
- framelen = le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFRAMELEN;
-
- /* Check for jumbo frame arrival with too small
- * RXFRAG_SIZE.
- */
- if (framelen > sp->rxfrag_size) {
- IPG_DEBUG_MSG
- ("RFS FrameLen > allocated fragment size\n");
-
- framelen = sp->rxfrag_size;
- }
-
- if ((IPG_DROP_ON_RX_ETH_ERRORS && (le64_to_cpu(rxfd->rfs) &
- (IPG_RFS_RXFIFOOVERRUN | IPG_RFS_RXRUNTFRAME |
- IPG_RFS_RXALIGNMENTERROR | IPG_RFS_RXFCSERROR |
- IPG_RFS_RXOVERSIZEDFRAME | IPG_RFS_RXLENGTHERROR)))) {
-
- IPG_DEBUG_MSG("Rx error, RFS = %016lx\n",
- (unsigned long int) rxfd->rfs);
-
- /* Increment general receive error statistic. */
- sp->stats.rx_errors++;
-
- /* Increment detailed receive error statistics. */
- if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFIFOOVERRUN) {
- IPG_DEBUG_MSG("RX FIFO overrun occurred\n");
- sp->stats.rx_fifo_errors++;
- }
-
- if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXRUNTFRAME) {
- IPG_DEBUG_MSG("RX runt occurred\n");
- sp->stats.rx_length_errors++;
- }
-
- if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXOVERSIZEDFRAME) ;
- /* Do nothing, error count handled by a IPG
- * statistic register.
- */
-
- if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXALIGNMENTERROR) {
- IPG_DEBUG_MSG("RX alignment error occurred\n");
- sp->stats.rx_frame_errors++;
- }
-
- if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFCSERROR) ;
- /* Do nothing, error count handled by a IPG
- * statistic register.
- */
-
- /* Free the memory associated with the RX
- * buffer since it is erroneous and we will
- * not pass it to higher layer processes.
- */
- if (skb) {
- __le64 info = rxfd->frag_info;
-
- pci_unmap_single(sp->pdev,
- le64_to_cpu(info) & ~IPG_RFI_FRAGLEN,
- sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
-
- dev_kfree_skb_irq(skb);
- }
- } else {
-
- /* Adjust the new buffer length to accommodate the size
- * of the received frame.
- */
- skb_put(skb, framelen);
-
- /* Set the buffer's protocol field to Ethernet. */
- skb->protocol = eth_type_trans(skb, dev);
-
- /* The IPG encountered an error with (or
- * there were no) IP/TCP/UDP checksums.
- * This may or may not indicate an invalid
- * IP/TCP/UDP frame was received. Let the
- * upper layer decide.
- */
- skb_checksum_none_assert(skb);
-
- /* Hand off frame for higher layer processing.
- * The function netif_rx() releases the sk_buff
- * when processing completes.
- */
- netif_rx(skb);
- }
-
- /* Assure RX buffer is not reused by IPG. */
- sp->rx_buff[entry] = NULL;
- }
-
- /*
- * If there are more RFDs to process and the allocated amount of RFD
- * processing time has expired, assert Interrupt Requested to make
- * sure we come back to process the remaining RFDs.
- */
- if (i == IPG_MAXRFDPROCESS_COUNT)
- ipg_w32(ipg_r32(ASIC_CTRL) | IPG_AC_INT_REQUEST, ASIC_CTRL);
-
-#ifdef IPG_DEBUG
- /* Check if the RFD list contained no receive frame data. */
- if (!i)
- sp->EmptyRFDListCount++;
-#endif
- while ((le64_to_cpu(rxfd->rfs) & IPG_RFS_RFDDONE) &&
- !((le64_to_cpu(rxfd->rfs) & IPG_RFS_FRAMESTART) &&
- (le64_to_cpu(rxfd->rfs) & IPG_RFS_FRAMEEND))) {
- unsigned int entry = curr++ % IPG_RFDLIST_LENGTH;
-
- rxfd = sp->rxd + entry;
-
- IPG_DEBUG_MSG("Frame requires multiple RFDs\n");
-
- /* An unexpected event, additional code needed to handle
- * properly. So for the time being, just disregard the
- * frame.
- */
-
- /* Free the memory associated with the RX
- * buffer since it is erroneous and we will
- * not pass it to higher layer processes.
- */
- if (sp->rx_buff[entry]) {
- pci_unmap_single(sp->pdev,
- le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
- sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
- dev_kfree_skb_irq(sp->rx_buff[entry]);
- }
-
- /* Assure RX buffer is not reused by IPG. */
- sp->rx_buff[entry] = NULL;
- }
-
- sp->rx_current = curr;
-
- /* Check to see if there are a minimum number of used
- * RFDs before restoring any (should improve performance.)
- */
- if ((curr - sp->rx_dirty) >= IPG_MINUSEDRFDSTOFREE)
- ipg_nic_rxrestore(dev);
-
- return 0;
-}
-
-static void ipg_reset_after_host_error(struct work_struct *work)
-{
- struct ipg_nic_private *sp =
- container_of(work, struct ipg_nic_private, task.work);
- struct net_device *dev = sp->dev;
-
- /*
- * Acknowledge HostError interrupt by resetting
- * IPG DMA and HOST.
- */
- ipg_reset(dev, IPG_AC_GLOBAL_RESET | IPG_AC_HOST | IPG_AC_DMA);
-
- init_rfdlist(dev);
- init_tfdlist(dev);
-
- if (ipg_io_config(dev) < 0) {
- netdev_info(dev, "Cannot recover from PCI error\n");
- schedule_delayed_work(&sp->task, HZ);
- }
-}
-
-static irqreturn_t ipg_interrupt_handler(int irq, void *dev_inst)
-{
- struct net_device *dev = dev_inst;
- struct ipg_nic_private *sp = netdev_priv(dev);
- void __iomem *ioaddr = sp->ioaddr;
- unsigned int handled = 0;
- u16 status;
-
- IPG_DEBUG_MSG("_interrupt_handler\n");
-
- if (sp->is_jumbo)
- ipg_nic_rxrestore(dev);
-
- spin_lock(&sp->lock);
-
- /* Get interrupt source information, and acknowledge
- * some (i.e. TxDMAComplete, RxDMAComplete, RxEarly,
- * IntRequested, MacControlFrame, LinkEvent) interrupts
- * if issued. Also, all IPG interrupts are disabled by
- * reading IntStatusAck.
- */
- status = ipg_r16(INT_STATUS_ACK);
-
- IPG_DEBUG_MSG("IntStatusAck = %04x\n", status);
-
- /* Shared IRQ of remove event. */
- if (!(status & IPG_IS_RSVD_MASK))
- goto out_enable;
-
- handled = 1;
-
- if (unlikely(!netif_running(dev)))
- goto out_unlock;
-
- /* If RFDListEnd interrupt, restore all used RFDs. */
- if (status & IPG_IS_RFD_LIST_END) {
- IPG_DEBUG_MSG("RFDListEnd Interrupt\n");
-
- /* The RFD list end indicates an RFD was encountered
- * with a 0 NextPtr, or with an RFDDone bit set to 1
- * (indicating the RFD is not read for use by the
- * IPG.) Try to restore all RFDs.
- */
- ipg_nic_rxrestore(dev);
-
-#ifdef IPG_DEBUG
- /* Increment the RFDlistendCount counter. */
- sp->RFDlistendCount++;
-#endif
- }
-
- /* If RFDListEnd, RxDMAPriority, RxDMAComplete, or
- * IntRequested interrupt, process received frames. */
- if ((status & IPG_IS_RX_DMA_PRIORITY) ||
- (status & IPG_IS_RFD_LIST_END) ||
- (status & IPG_IS_RX_DMA_COMPLETE) ||
- (status & IPG_IS_INT_REQUESTED)) {
-#ifdef IPG_DEBUG
- /* Increment the RFD list checked counter if interrupted
- * only to check the RFD list. */
- if (status & (~(IPG_IS_RX_DMA_PRIORITY | IPG_IS_RFD_LIST_END |
- IPG_IS_RX_DMA_COMPLETE | IPG_IS_INT_REQUESTED) &
- (IPG_IS_HOST_ERROR | IPG_IS_TX_DMA_COMPLETE |
- IPG_IS_LINK_EVENT | IPG_IS_TX_COMPLETE |
- IPG_IS_UPDATE_STATS)))
- sp->RFDListCheckedCount++;
-#endif
-
- if (sp->is_jumbo)
- ipg_nic_rx_jumbo(dev);
- else
- ipg_nic_rx(dev);
- }
-
- /* If TxDMAComplete interrupt, free used TFDs. */
- if (status & IPG_IS_TX_DMA_COMPLETE)
- ipg_nic_txfree(dev);
-
- /* TxComplete interrupts indicate one of numerous actions.
- * Determine what action to take based on TXSTATUS register.
- */
- if (status & IPG_IS_TX_COMPLETE)
- ipg_nic_txcleanup(dev);
-
- /* If UpdateStats interrupt, update Linux Ethernet statistics */
- if (status & IPG_IS_UPDATE_STATS)
- ipg_nic_get_stats(dev);
-
- /* If HostError interrupt, reset IPG. */
- if (status & IPG_IS_HOST_ERROR) {
- IPG_DDEBUG_MSG("HostError Interrupt\n");
-
- schedule_delayed_work(&sp->task, 0);
- }
-
- /* If LinkEvent interrupt, resolve autonegotiation. */
- if (status & IPG_IS_LINK_EVENT) {
- if (ipg_config_autoneg(dev) < 0)
- netdev_info(dev, "Auto-negotiation error\n");
- }
-
- /* If MACCtrlFrame interrupt, do nothing. */
- if (status & IPG_IS_MAC_CTRL_FRAME)
- IPG_DEBUG_MSG("MACCtrlFrame interrupt\n");
-
- /* If RxComplete interrupt, do nothing. */
- if (status & IPG_IS_RX_COMPLETE)
- IPG_DEBUG_MSG("RxComplete interrupt\n");
-
- /* If RxEarly interrupt, do nothing. */
- if (status & IPG_IS_RX_EARLY)
- IPG_DEBUG_MSG("RxEarly interrupt\n");
-
-out_enable:
- /* Re-enable IPG interrupts. */
- ipg_w16(IPG_IE_TX_DMA_COMPLETE | IPG_IE_RX_DMA_COMPLETE |
- IPG_IE_HOST_ERROR | IPG_IE_INT_REQUESTED | IPG_IE_TX_COMPLETE |
- IPG_IE_LINK_EVENT | IPG_IE_UPDATE_STATS, INT_ENABLE);
-out_unlock:
- spin_unlock(&sp->lock);
-
- return IRQ_RETVAL(handled);
-}
-
-static void ipg_rx_clear(struct ipg_nic_private *sp)
-{
- unsigned int i;
-
- for (i = 0; i < IPG_RFDLIST_LENGTH; i++) {
- if (sp->rx_buff[i]) {
- struct ipg_rx *rxfd = sp->rxd + i;
-
- dev_kfree_skb_irq(sp->rx_buff[i]);
- sp->rx_buff[i] = NULL;
- pci_unmap_single(sp->pdev,
- le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
- sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
- }
- }
-}
-
-static void ipg_tx_clear(struct ipg_nic_private *sp)
-{
- unsigned int i;
-
- for (i = 0; i < IPG_TFDLIST_LENGTH; i++) {
- if (sp->tx_buff[i]) {
- struct ipg_tx *txfd = sp->txd + i;
-
- pci_unmap_single(sp->pdev,
- le64_to_cpu(txfd->frag_info) & ~IPG_TFI_FRAGLEN,
- sp->tx_buff[i]->len, PCI_DMA_TODEVICE);
-
- dev_kfree_skb_irq(sp->tx_buff[i]);
-
- sp->tx_buff[i] = NULL;
- }
- }
-}
-
-static int ipg_nic_open(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- void __iomem *ioaddr = sp->ioaddr;
- struct pci_dev *pdev = sp->pdev;
- int rc;
-
- IPG_DEBUG_MSG("_nic_open\n");
-
- sp->rx_buf_sz = sp->rxsupport_size;
-
- /* Check for interrupt line conflicts, and request interrupt
- * line for IPG.
- *
- * IMPORTANT: Disable IPG interrupts prior to registering
- * IRQ.
- */
- ipg_w16(0x0000, INT_ENABLE);
-
- /* Register the interrupt line to be used by the IPG within
- * the Linux system.
- */
- rc = request_irq(pdev->irq, ipg_interrupt_handler, IRQF_SHARED,
- dev->name, dev);
- if (rc < 0) {
- netdev_info(dev, "Error when requesting interrupt\n");
- goto out;
- }
-
- dev->irq = pdev->irq;
-
- rc = -ENOMEM;
-
- sp->rxd = dma_alloc_coherent(&pdev->dev, IPG_RX_RING_BYTES,
- &sp->rxd_map, GFP_KERNEL);
- if (!sp->rxd)
- goto err_free_irq_0;
-
- sp->txd = dma_alloc_coherent(&pdev->dev, IPG_TX_RING_BYTES,
- &sp->txd_map, GFP_KERNEL);
- if (!sp->txd)
- goto err_free_rx_1;
-
- rc = init_rfdlist(dev);
- if (rc < 0) {
- netdev_info(dev, "Error during configuration\n");
- goto err_free_tx_2;
- }
-
- init_tfdlist(dev);
-
- rc = ipg_io_config(dev);
- if (rc < 0) {
- netdev_info(dev, "Error during configuration\n");
- goto err_release_tfdlist_3;
- }
-
- /* Resolve autonegotiation. */
- if (ipg_config_autoneg(dev) < 0)
- netdev_info(dev, "Auto-negotiation error\n");
-
- /* initialize JUMBO Frame control variable */
- sp->jumbo.found_start = 0;
- sp->jumbo.current_size = 0;
- sp->jumbo.skb = NULL;
-
- /* Enable transmit and receive operation of the IPG. */
- ipg_w32((ipg_r32(MAC_CTRL) | IPG_MC_RX_ENABLE | IPG_MC_TX_ENABLE) &
- IPG_MC_RSVD_MASK, MAC_CTRL);
-
- netif_start_queue(dev);
-out:
- return rc;
-
-err_release_tfdlist_3:
- ipg_tx_clear(sp);
- ipg_rx_clear(sp);
-err_free_tx_2:
- dma_free_coherent(&pdev->dev, IPG_TX_RING_BYTES, sp->txd, sp->txd_map);
-err_free_rx_1:
- dma_free_coherent(&pdev->dev, IPG_RX_RING_BYTES, sp->rxd, sp->rxd_map);
-err_free_irq_0:
- free_irq(pdev->irq, dev);
- goto out;
-}
-
-static int ipg_nic_stop(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- void __iomem *ioaddr = sp->ioaddr;
- struct pci_dev *pdev = sp->pdev;
-
- IPG_DEBUG_MSG("_nic_stop\n");
-
- netif_stop_queue(dev);
-
- IPG_DUMPTFDLIST(dev);
-
- do {
- (void) ipg_r16(INT_STATUS_ACK);
-
- ipg_reset(dev, IPG_AC_GLOBAL_RESET | IPG_AC_HOST | IPG_AC_DMA);
-
- synchronize_irq(pdev->irq);
- } while (ipg_r16(INT_ENABLE) & IPG_IE_RSVD_MASK);
-
- ipg_rx_clear(sp);
-
- ipg_tx_clear(sp);
-
- pci_free_consistent(pdev, IPG_RX_RING_BYTES, sp->rxd, sp->rxd_map);
- pci_free_consistent(pdev, IPG_TX_RING_BYTES, sp->txd, sp->txd_map);
-
- free_irq(pdev->irq, dev);
-
- return 0;
-}
-
-static netdev_tx_t ipg_nic_hard_start_xmit(struct sk_buff *skb,
- struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- void __iomem *ioaddr = sp->ioaddr;
- unsigned int entry = sp->tx_current % IPG_TFDLIST_LENGTH;
- unsigned long flags;
- struct ipg_tx *txfd;
-
- IPG_DDEBUG_MSG("_nic_hard_start_xmit\n");
-
- /* If in 10Mbps mode, stop the transmit queue so
- * no more transmit frames are accepted.
- */
- if (sp->tenmbpsmode)
- netif_stop_queue(dev);
-
- if (sp->reset_current_tfd) {
- sp->reset_current_tfd = 0;
- entry = 0;
- }
-
- txfd = sp->txd + entry;
-
- sp->tx_buff[entry] = skb;
-
- /* Clear all TFC fields, except TFDDONE. */
- txfd->tfc = cpu_to_le64(IPG_TFC_TFDDONE);
-
- /* Specify the TFC field within the TFD. */
- txfd->tfc |= cpu_to_le64(IPG_TFC_WORDALIGNDISABLED |
- (IPG_TFC_FRAMEID & sp->tx_current) |
- (IPG_TFC_FRAGCOUNT & (1 << 24)));
- /*
- * 16--17 (WordAlign) <- 3 (disable),
- * 0--15 (FrameId) <- sp->tx_current,
- * 24--27 (FragCount) <- 1
- */
-
- /* Request TxComplete interrupts at an interval defined
- * by the constant IPG_FRAMESBETWEENTXCOMPLETES.
- * Request TxComplete interrupt for every frame
- * if in 10Mbps mode to accommodate problem with 10Mbps
- * processing.
- */
- if (sp->tenmbpsmode)
- txfd->tfc |= cpu_to_le64(IPG_TFC_TXINDICATE);
- txfd->tfc |= cpu_to_le64(IPG_TFC_TXDMAINDICATE);
- /* Based on compilation option, determine if FCS is to be
- * appended to transmit frame by IPG.
- */
- if (!(IPG_APPEND_FCS_ON_TX))
- txfd->tfc |= cpu_to_le64(IPG_TFC_FCSAPPENDDISABLE);
-
- /* Based on compilation option, determine if IP, TCP and/or
- * UDP checksums are to be added to transmit frame by IPG.
- */
- if (IPG_ADD_IPCHECKSUM_ON_TX)
- txfd->tfc |= cpu_to_le64(IPG_TFC_IPCHECKSUMENABLE);
-
- if (IPG_ADD_TCPCHECKSUM_ON_TX)
- txfd->tfc |= cpu_to_le64(IPG_TFC_TCPCHECKSUMENABLE);
-
- if (IPG_ADD_UDPCHECKSUM_ON_TX)
- txfd->tfc |= cpu_to_le64(IPG_TFC_UDPCHECKSUMENABLE);
-
- /* Based on compilation option, determine if VLAN tag info is to be
- * inserted into transmit frame by IPG.
- */
- if (IPG_INSERT_MANUAL_VLAN_TAG) {
- txfd->tfc |= cpu_to_le64(IPG_TFC_VLANTAGINSERT |
- ((u64) IPG_MANUAL_VLAN_VID << 32) |
- ((u64) IPG_MANUAL_VLAN_CFI << 44) |
- ((u64) IPG_MANUAL_VLAN_USERPRIORITY << 45));
- }
-
- /* The fragment start location within system memory is defined
- * by the sk_buff structure's data field. The physical address
- * of this location within the system's virtual memory space
- * is determined using the IPG_HOST2BUS_MAP function.
- */
- txfd->frag_info = cpu_to_le64(pci_map_single(sp->pdev, skb->data,
- skb->len, PCI_DMA_TODEVICE));
-
- /* The length of the fragment within system memory is defined by
- * the sk_buff structure's len field.
- */
- txfd->frag_info |= cpu_to_le64(IPG_TFI_FRAGLEN &
- ((u64) (skb->len & 0xffff) << 48));
-
- /* Clear the TFDDone bit last to indicate the TFD is ready
- * for transfer to the IPG.
- */
- txfd->tfc &= cpu_to_le64(~IPG_TFC_TFDDONE);
-
- spin_lock_irqsave(&sp->lock, flags);
-
- sp->tx_current++;
-
- mmiowb();
-
- ipg_w32(IPG_DC_TX_DMA_POLL_NOW, DMA_CTRL);
-
- if (sp->tx_current == (sp->tx_dirty + IPG_TFDLIST_LENGTH))
- netif_stop_queue(dev);
-
- spin_unlock_irqrestore(&sp->lock, flags);
-
- return NETDEV_TX_OK;
-}
-
-static void ipg_set_phy_default_param(unsigned char rev,
- struct net_device *dev, int phy_address)
-{
- unsigned short length;
- unsigned char revision;
- const unsigned short *phy_param;
- unsigned short address, value;
-
- phy_param = &DefaultPhyParam[0];
- length = *phy_param & 0x00FF;
- revision = (unsigned char)((*phy_param) >> 8);
- phy_param++;
- while (length != 0) {
- if (rev == revision) {
- while (length > 1) {
- address = *phy_param;
- value = *(phy_param + 1);
- phy_param += 2;
- mdio_write(dev, phy_address, address, value);
- length -= 4;
- }
- break;
- } else {
- phy_param += length / 2;
- length = *phy_param & 0x00FF;
- revision = (unsigned char)((*phy_param) >> 8);
- phy_param++;
- }
- }
-}
-
-static int read_eeprom(struct net_device *dev, int eep_addr)
-{
- void __iomem *ioaddr = ipg_ioaddr(dev);
- unsigned int i;
- int ret = 0;
- u16 value;
-
- value = IPG_EC_EEPROM_READOPCODE | (eep_addr & 0xff);
- ipg_w16(value, EEPROM_CTRL);
-
- for (i = 0; i < 1000; i++) {
- u16 data;
-
- mdelay(10);
- data = ipg_r16(EEPROM_CTRL);
- if (!(data & IPG_EC_EEPROM_BUSY)) {
- ret = ipg_r16(EEPROM_DATA);
- break;
- }
- }
- return ret;
-}
-
-static void ipg_init_mii(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- struct mii_if_info *mii_if = &sp->mii_if;
- int phyaddr;
-
- mii_if->dev = dev;
- mii_if->mdio_read = mdio_read;
- mii_if->mdio_write = mdio_write;
- mii_if->phy_id_mask = 0x1f;
- mii_if->reg_num_mask = 0x1f;
-
- mii_if->phy_id = phyaddr = ipg_find_phyaddr(dev);
-
- if (phyaddr != 0x1f) {
- u16 mii_phyctrl, mii_1000cr;
-
- mii_1000cr = mdio_read(dev, phyaddr, MII_CTRL1000);
- mii_1000cr |= ADVERTISE_1000FULL | ADVERTISE_1000HALF |
- GMII_PHY_1000BASETCONTROL_PreferMaster;
- mdio_write(dev, phyaddr, MII_CTRL1000, mii_1000cr);
-
- mii_phyctrl = mdio_read(dev, phyaddr, MII_BMCR);
-
- /* Set default phyparam */
- ipg_set_phy_default_param(sp->pdev->revision, dev, phyaddr);
-
- /* Reset PHY */
- mii_phyctrl |= BMCR_RESET | BMCR_ANRESTART;
- mdio_write(dev, phyaddr, MII_BMCR, mii_phyctrl);
-
- }
-}
-
-static int ipg_hw_init(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- void __iomem *ioaddr = sp->ioaddr;
- unsigned int i;
- int rc;
-
- /* Read/Write and Reset EEPROM Value */
- /* Read LED Mode Configuration from EEPROM */
- sp->led_mode = read_eeprom(dev, 6);
-
- /* Reset all functions within the IPG. Do not assert
- * RST_OUT as not compatible with some PHYs.
- */
- rc = ipg_reset(dev, IPG_RESET_MASK);
- if (rc < 0)
- goto out;
-
- ipg_init_mii(dev);
-
- /* Read MAC Address from EEPROM */
- for (i = 0; i < 3; i++)
- sp->station_addr[i] = read_eeprom(dev, 16 + i);
-
- for (i = 0; i < 3; i++)
- ipg_w16(sp->station_addr[i], STATION_ADDRESS_0 + 2*i);
-
- /* Set station address in ethernet_device structure. */
- dev->dev_addr[0] = ipg_r16(STATION_ADDRESS_0) & 0x00ff;
- dev->dev_addr[1] = (ipg_r16(STATION_ADDRESS_0) & 0xff00) >> 8;
- dev->dev_addr[2] = ipg_r16(STATION_ADDRESS_1) & 0x00ff;
- dev->dev_addr[3] = (ipg_r16(STATION_ADDRESS_1) & 0xff00) >> 8;
- dev->dev_addr[4] = ipg_r16(STATION_ADDRESS_2) & 0x00ff;
- dev->dev_addr[5] = (ipg_r16(STATION_ADDRESS_2) & 0xff00) >> 8;
-out:
- return rc;
-}
-
-static int ipg_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- int rc;
-
- mutex_lock(&sp->mii_mutex);
- rc = generic_mii_ioctl(&sp->mii_if, if_mii(ifr), cmd, NULL);
- mutex_unlock(&sp->mii_mutex);
-
- return rc;
-}
-
-static int ipg_nic_change_mtu(struct net_device *dev, int new_mtu)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- int err;
-
- /* Function to accommodate changes to Maximum Transfer Unit
- * (or MTU) of IPG NIC. Cannot use default function since
- * the default will not allow for MTU > 1500 bytes.
- */
-
- IPG_DEBUG_MSG("_nic_change_mtu\n");
-
- /*
- * Check that the new MTU value is between 68 (14 byte header, 46 byte
- * payload, 4 byte FCS) and 10 KB, which is the largest supported MTU.
- */
- if (new_mtu < 68 || new_mtu > 10240)
- return -EINVAL;
-
- err = ipg_nic_stop(dev);
- if (err)
- return err;
-
- dev->mtu = new_mtu;
-
- sp->max_rxframe_size = new_mtu;
-
- sp->rxfrag_size = new_mtu;
- if (sp->rxfrag_size > 4088)
- sp->rxfrag_size = 4088;
-
- sp->rxsupport_size = sp->max_rxframe_size;
-
- if (new_mtu > 0x0600)
- sp->is_jumbo = true;
- else
- sp->is_jumbo = false;
-
- return ipg_nic_open(dev);
-}
-
-static int ipg_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- int rc;
-
- mutex_lock(&sp->mii_mutex);
- rc = mii_ethtool_gset(&sp->mii_if, cmd);
- mutex_unlock(&sp->mii_mutex);
-
- return rc;
-}
-
-static int ipg_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- int rc;
-
- mutex_lock(&sp->mii_mutex);
- rc = mii_ethtool_sset(&sp->mii_if, cmd);
- mutex_unlock(&sp->mii_mutex);
-
- return rc;
-}
-
-static int ipg_nway_reset(struct net_device *dev)
-{
- struct ipg_nic_private *sp = netdev_priv(dev);
- int rc;
-
- mutex_lock(&sp->mii_mutex);
- rc = mii_nway_restart(&sp->mii_if);
- mutex_unlock(&sp->mii_mutex);
-
- return rc;
-}
-
-static const struct ethtool_ops ipg_ethtool_ops = {
- .get_settings = ipg_get_settings,
- .set_settings = ipg_set_settings,
- .nway_reset = ipg_nway_reset,
-};
-
-static void ipg_remove(struct pci_dev *pdev)
-{
- struct net_device *dev = pci_get_drvdata(pdev);
- struct ipg_nic_private *sp = netdev_priv(dev);
-
- IPG_DEBUG_MSG("_remove\n");
-
- /* Un-register Ethernet device. */
- unregister_netdev(dev);
-
- pci_iounmap(pdev, sp->ioaddr);
-
- pci_release_regions(pdev);
-
- free_netdev(dev);
- pci_disable_device(pdev);
-}
-
-static const struct net_device_ops ipg_netdev_ops = {
- .ndo_open = ipg_nic_open,
- .ndo_stop = ipg_nic_stop,
- .ndo_start_xmit = ipg_nic_hard_start_xmit,
- .ndo_get_stats = ipg_nic_get_stats,
- .ndo_set_rx_mode = ipg_nic_set_multicast_list,
- .ndo_do_ioctl = ipg_ioctl,
- .ndo_tx_timeout = ipg_tx_timeout,
- .ndo_change_mtu = ipg_nic_change_mtu,
- .ndo_set_mac_address = eth_mac_addr,
- .ndo_validate_addr = eth_validate_addr,
-};
-
-static int ipg_probe(struct pci_dev *pdev, const struct pci_device_id *id)
-{
- unsigned int i = id->driver_data;
- struct ipg_nic_private *sp;
- struct net_device *dev;
- void __iomem *ioaddr;
- int rc;
-
- rc = pci_enable_device(pdev);
- if (rc < 0)
- goto out;
-
- pr_info("%s: %s\n", pci_name(pdev), ipg_brand_name[i]);
-
- pci_set_master(pdev);
-
- rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
- if (rc < 0) {
- rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
- if (rc < 0) {
- pr_err("%s: DMA config failed\n", pci_name(pdev));
- goto err_disable_0;
- }
- }
-
- /*
- * Initialize net device.
- */
- dev = alloc_etherdev(sizeof(struct ipg_nic_private));
- if (!dev) {
- rc = -ENOMEM;
- goto err_disable_0;
- }
-
- sp = netdev_priv(dev);
- spin_lock_init(&sp->lock);
- mutex_init(&sp->mii_mutex);
-
- sp->is_jumbo = IPG_IS_JUMBO;
- sp->rxfrag_size = IPG_RXFRAG_SIZE;
- sp->rxsupport_size = IPG_RXSUPPORT_SIZE;
- sp->max_rxframe_size = IPG_MAX_RXFRAME_SIZE;
-
- /* Declare IPG NIC functions for Ethernet device methods.
- */
- dev->netdev_ops = &ipg_netdev_ops;
- SET_NETDEV_DEV(dev, &pdev->dev);
- dev->ethtool_ops = &ipg_ethtool_ops;
-
- rc = pci_request_regions(pdev, DRV_NAME);
- if (rc)
- goto err_free_dev_1;
-
- ioaddr = pci_iomap(pdev, 1, pci_resource_len(pdev, 1));
- if (!ioaddr) {
- pr_err("%s: cannot map MMIO\n", pci_name(pdev));
- rc = -EIO;
- goto err_release_regions_2;
- }
-
- /* Save the pointer to the PCI device information. */
- sp->ioaddr = ioaddr;
- sp->pdev = pdev;
- sp->dev = dev;
-
- INIT_DELAYED_WORK(&sp->task, ipg_reset_after_host_error);
-
- pci_set_drvdata(pdev, dev);
-
- rc = ipg_hw_init(dev);
- if (rc < 0)
- goto err_unmap_3;
-
- rc = register_netdev(dev);
- if (rc < 0)
- goto err_unmap_3;
-
- netdev_info(dev, "Ethernet device registered\n");
-out:
- return rc;
-
-err_unmap_3:
- pci_iounmap(pdev, ioaddr);
-err_release_regions_2:
- pci_release_regions(pdev);
-err_free_dev_1:
- free_netdev(dev);
-err_disable_0:
- pci_disable_device(pdev);
- goto out;
-}
-
-static struct pci_driver ipg_pci_driver = {
- .name = IPG_DRIVER_NAME,
- .id_table = ipg_pci_tbl,
- .probe = ipg_probe,
- .remove = ipg_remove,
-};
-
-module_pci_driver(ipg_pci_driver);
diff --git a/drivers/net/ethernet/icplus/ipg.h b/drivers/net/ethernet/icplus/ipg.h
deleted file mode 100644
index de606281f97b..000000000000
--- a/drivers/net/ethernet/icplus/ipg.h
+++ /dev/null
@@ -1,748 +0,0 @@
-/*
- * Include file for Gigabit Ethernet device driver for Network
- * Interface Cards (NICs) utilizing the Tamarack Microelectronics
- * Inc. IPG Gigabit or Triple Speed Ethernet Media Access
- * Controller.
- */
-#ifndef __LINUX_IPG_H
-#define __LINUX_IPG_H
-
-#include <linux/module.h>
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/ioport.h>
-#include <linux/errno.h>
-#include <asm/io.h>
-#include <linux/delay.h>
-#include <linux/types.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <asm/bitops.h>
-
-/*
- * Constants
- */
-
-/* GMII based PHY IDs */
-#define NS 0x2000
-#define MARVELL 0x0141
-#define ICPLUS_PHY 0x243
-
-/* NIC Physical Layer Device MII register fields. */
-#define MII_PHY_SELECTOR_IEEE8023 0x0001
-#define MII_PHY_TECHABILITYFIELD 0x1FE0
-
-/* GMII_PHY_1000 need to set to prefer master */
-#define GMII_PHY_1000BASETCONTROL_PreferMaster 0x0400
-
-/* NIC Physical Layer Device GMII constants. */
-#define GMII_PREAMBLE 0xFFFFFFFF
-#define GMII_ST 0x1
-#define GMII_READ 0x2
-#define GMII_WRITE 0x1
-#define GMII_TA_READ_MASK 0x1
-#define GMII_TA_WRITE 0x2
-
-/* I/O register offsets. */
-enum ipg_regs {
- DMA_CTRL = 0x00,
- RX_DMA_STATUS = 0x08, /* Unused + reserved */
- TFD_LIST_PTR_0 = 0x10,
- TFD_LIST_PTR_1 = 0x14,
- TX_DMA_BURST_THRESH = 0x18,
- TX_DMA_URGENT_THRESH = 0x19,
- TX_DMA_POLL_PERIOD = 0x1a,
- RFD_LIST_PTR_0 = 0x1c,
- RFD_LIST_PTR_1 = 0x20,
- RX_DMA_BURST_THRESH = 0x24,
- RX_DMA_URGENT_THRESH = 0x25,
- RX_DMA_POLL_PERIOD = 0x26,
- DEBUG_CTRL = 0x2c,
- ASIC_CTRL = 0x30,
- FIFO_CTRL = 0x38, /* Unused */
- FLOW_OFF_THRESH = 0x3c,
- FLOW_ON_THRESH = 0x3e,
- EEPROM_DATA = 0x48,
- EEPROM_CTRL = 0x4a,
- EXPROM_ADDR = 0x4c, /* Unused */
- EXPROM_DATA = 0x50, /* Unused */
- WAKE_EVENT = 0x51, /* Unused */
- COUNTDOWN = 0x54, /* Unused */
- INT_STATUS_ACK = 0x5a,
- INT_ENABLE = 0x5c,
- INT_STATUS = 0x5e, /* Unused */
- TX_STATUS = 0x60,
- MAC_CTRL = 0x6c,
- VLAN_TAG = 0x70, /* Unused */
- PHY_SET = 0x75,
- PHY_CTRL = 0x76,
- STATION_ADDRESS_0 = 0x78,
- STATION_ADDRESS_1 = 0x7a,
- STATION_ADDRESS_2 = 0x7c,
- MAX_FRAME_SIZE = 0x86,
- RECEIVE_MODE = 0x88,
- HASHTABLE_0 = 0x8c,
- HASHTABLE_1 = 0x90,
- RMON_STATISTICS_MASK = 0x98,
- STATISTICS_MASK = 0x9c,
- RX_JUMBO_FRAMES = 0xbc, /* Unused */
- TCP_CHECKSUM_ERRORS = 0xc0, /* Unused */
- IP_CHECKSUM_ERRORS = 0xc2, /* Unused */
- UDP_CHECKSUM_ERRORS = 0xc4, /* Unused */
- TX_JUMBO_FRAMES = 0xf4 /* Unused */
-};
-
-/* Ethernet MIB statistic register offsets. */
-#define IPG_OCTETRCVOK 0xA8
-#define IPG_MCSTOCTETRCVDOK 0xAC
-#define IPG_BCSTOCTETRCVOK 0xB0
-#define IPG_FRAMESRCVDOK 0xB4
-#define IPG_MCSTFRAMESRCVDOK 0xB8
-#define IPG_BCSTFRAMESRCVDOK 0xBE
-#define IPG_MACCONTROLFRAMESRCVD 0xC6
-#define IPG_FRAMETOOLONGERRORS 0xC8
-#define IPG_INRANGELENGTHERRORS 0xCA
-#define IPG_FRAMECHECKSEQERRORS 0xCC
-#define IPG_FRAMESLOSTRXERRORS 0xCE
-#define IPG_OCTETXMTOK 0xD0
-#define IPG_MCSTOCTETXMTOK 0xD4
-#define IPG_BCSTOCTETXMTOK 0xD8
-#define IPG_FRAMESXMTDOK 0xDC
-#define IPG_MCSTFRAMESXMTDOK 0xE0
-#define IPG_FRAMESWDEFERREDXMT 0xE4
-#define IPG_LATECOLLISIONS 0xE8
-#define IPG_MULTICOLFRAMES 0xEC
-#define IPG_SINGLECOLFRAMES 0xF0
-#define IPG_BCSTFRAMESXMTDOK 0xF6
-#define IPG_CARRIERSENSEERRORS 0xF8
-#define IPG_MACCONTROLFRAMESXMTDOK 0xFA
-#define IPG_FRAMESABORTXSCOLLS 0xFC
-#define IPG_FRAMESWEXDEFERRAL 0xFE
-
-/* RMON statistic register offsets. */
-#define IPG_ETHERSTATSCOLLISIONS 0x100
-#define IPG_ETHERSTATSOCTETSTRANSMIT 0x104
-#define IPG_ETHERSTATSPKTSTRANSMIT 0x108
-#define IPG_ETHERSTATSPKTS64OCTESTSTRANSMIT 0x10C
-#define IPG_ETHERSTATSPKTS65TO127OCTESTSTRANSMIT 0x110
-#define IPG_ETHERSTATSPKTS128TO255OCTESTSTRANSMIT 0x114
-#define IPG_ETHERSTATSPKTS256TO511OCTESTSTRANSMIT 0x118
-#define IPG_ETHERSTATSPKTS512TO1023OCTESTSTRANSMIT 0x11C
-#define IPG_ETHERSTATSPKTS1024TO1518OCTESTSTRANSMIT 0x120
-#define IPG_ETHERSTATSCRCALIGNERRORS 0x124
-#define IPG_ETHERSTATSUNDERSIZEPKTS 0x128
-#define IPG_ETHERSTATSFRAGMENTS 0x12C
-#define IPG_ETHERSTATSJABBERS 0x130
-#define IPG_ETHERSTATSOCTETS 0x134
-#define IPG_ETHERSTATSPKTS 0x138
-#define IPG_ETHERSTATSPKTS64OCTESTS 0x13C
-#define IPG_ETHERSTATSPKTS65TO127OCTESTS 0x140
-#define IPG_ETHERSTATSPKTS128TO255OCTESTS 0x144
-#define IPG_ETHERSTATSPKTS256TO511OCTESTS 0x148
-#define IPG_ETHERSTATSPKTS512TO1023OCTESTS 0x14C
-#define IPG_ETHERSTATSPKTS1024TO1518OCTESTS 0x150
-
-/* RMON statistic register equivalents. */
-#define IPG_ETHERSTATSMULTICASTPKTSTRANSMIT 0xE0
-#define IPG_ETHERSTATSBROADCASTPKTSTRANSMIT 0xF6
-#define IPG_ETHERSTATSMULTICASTPKTS 0xB8
-#define IPG_ETHERSTATSBROADCASTPKTS 0xBE
-#define IPG_ETHERSTATSOVERSIZEPKTS 0xC8
-#define IPG_ETHERSTATSDROPEVENTS 0xCE
-
-/* Serial EEPROM offsets */
-#define IPG_EEPROM_CONFIGPARAM 0x00
-#define IPG_EEPROM_ASICCTRL 0x01
-#define IPG_EEPROM_SUBSYSTEMVENDORID 0x02
-#define IPG_EEPROM_SUBSYSTEMID 0x03
-#define IPG_EEPROM_STATIONADDRESS0 0x10
-#define IPG_EEPROM_STATIONADDRESS1 0x11
-#define IPG_EEPROM_STATIONADDRESS2 0x12
-
-/* Register & data structure bit masks */
-
-/* PCI register masks. */
-
-/* IOBaseAddress */
-#define IPG_PIB_RSVD_MASK 0xFFFFFE01
-#define IPG_PIB_IOBASEADDRESS 0xFFFFFF00
-#define IPG_PIB_IOBASEADDRIND 0x00000001
-
-/* MemBaseAddress */
-#define IPG_PMB_RSVD_MASK 0xFFFFFE07
-#define IPG_PMB_MEMBASEADDRIND 0x00000001
-#define IPG_PMB_MEMMAPTYPE 0x00000006
-#define IPG_PMB_MEMMAPTYPE0 0x00000002
-#define IPG_PMB_MEMMAPTYPE1 0x00000004
-#define IPG_PMB_MEMBASEADDRESS 0xFFFFFE00
-
-/* ConfigStatus */
-#define IPG_CS_RSVD_MASK 0xFFB0
-#define IPG_CS_CAPABILITIES 0x0010
-#define IPG_CS_66MHZCAPABLE 0x0020
-#define IPG_CS_FASTBACK2BACK 0x0080
-#define IPG_CS_DATAPARITYREPORTED 0x0100
-#define IPG_CS_DEVSELTIMING 0x0600
-#define IPG_CS_SIGNALEDTARGETABORT 0x0800
-#define IPG_CS_RECEIVEDTARGETABORT 0x1000
-#define IPG_CS_RECEIVEDMASTERABORT 0x2000
-#define IPG_CS_SIGNALEDSYSTEMERROR 0x4000
-#define IPG_CS_DETECTEDPARITYERROR 0x8000
-
-/* TFD data structure masks. */
-
-/* TFDList, TFC */
-#define IPG_TFC_RSVD_MASK 0x0000FFFF9FFFFFFFULL
-#define IPG_TFC_FRAMEID 0x000000000000FFFFULL
-#define IPG_TFC_WORDALIGN 0x0000000000030000ULL
-#define IPG_TFC_WORDALIGNTODWORD 0x0000000000000000ULL
-#define IPG_TFC_WORDALIGNTOWORD 0x0000000000020000ULL
-#define IPG_TFC_WORDALIGNDISABLED 0x0000000000030000ULL
-#define IPG_TFC_TCPCHECKSUMENABLE 0x0000000000040000ULL
-#define IPG_TFC_UDPCHECKSUMENABLE 0x0000000000080000ULL
-#define IPG_TFC_IPCHECKSUMENABLE 0x0000000000100000ULL
-#define IPG_TFC_FCSAPPENDDISABLE 0x0000000000200000ULL
-#define IPG_TFC_TXINDICATE 0x0000000000400000ULL
-#define IPG_TFC_TXDMAINDICATE 0x0000000000800000ULL
-#define IPG_TFC_FRAGCOUNT 0x000000000F000000ULL
-#define IPG_TFC_VLANTAGINSERT 0x0000000010000000ULL
-#define IPG_TFC_TFDDONE 0x0000000080000000ULL
-#define IPG_TFC_VID 0x00000FFF00000000ULL
-#define IPG_TFC_CFI 0x0000100000000000ULL
-#define IPG_TFC_USERPRIORITY 0x0000E00000000000ULL
-
-/* TFDList, FragInfo */
-#define IPG_TFI_RSVD_MASK 0xFFFF00FFFFFFFFFFULL
-#define IPG_TFI_FRAGADDR 0x000000FFFFFFFFFFULL
-#define IPG_TFI_FRAGLEN 0xFFFF000000000000ULL
-
-/* RFD data structure masks. */
-
-/* RFDList, RFS */
-#define IPG_RFS_RSVD_MASK 0x0000FFFFFFFFFFFFULL
-#define IPG_RFS_RXFRAMELEN 0x000000000000FFFFULL
-#define IPG_RFS_RXFIFOOVERRUN 0x0000000000010000ULL
-#define IPG_RFS_RXRUNTFRAME 0x0000000000020000ULL
-#define IPG_RFS_RXALIGNMENTERROR 0x0000000000040000ULL
-#define IPG_RFS_RXFCSERROR 0x0000000000080000ULL
-#define IPG_RFS_RXOVERSIZEDFRAME 0x0000000000100000ULL
-#define IPG_RFS_RXLENGTHERROR 0x0000000000200000ULL
-#define IPG_RFS_VLANDETECTED 0x0000000000400000ULL
-#define IPG_RFS_TCPDETECTED 0x0000000000800000ULL
-#define IPG_RFS_TCPERROR 0x0000000001000000ULL
-#define IPG_RFS_UDPDETECTED 0x0000000002000000ULL
-#define IPG_RFS_UDPERROR 0x0000000004000000ULL
-#define IPG_RFS_IPDETECTED 0x0000000008000000ULL
-#define IPG_RFS_IPERROR 0x0000000010000000ULL
-#define IPG_RFS_FRAMESTART 0x0000000020000000ULL
-#define IPG_RFS_FRAMEEND 0x0000000040000000ULL
-#define IPG_RFS_RFDDONE 0x0000000080000000ULL
-#define IPG_RFS_TCI 0x0000FFFF00000000ULL
-
-/* RFDList, FragInfo */
-#define IPG_RFI_RSVD_MASK 0xFFFF00FFFFFFFFFFULL
-#define IPG_RFI_FRAGADDR 0x000000FFFFFFFFFFULL
-#define IPG_RFI_FRAGLEN 0xFFFF000000000000ULL
-
-/* I/O Register masks. */
-
-/* RMON Statistics Mask */
-#define IPG_RZ_ALL 0x0FFFFFFF
-
-/* Statistics Mask */
-#define IPG_SM_ALL 0x0FFFFFFF
-#define IPG_SM_OCTETRCVOK_FRAMESRCVDOK 0x00000001
-#define IPG_SM_MCSTOCTETRCVDOK_MCSTFRAMESRCVDOK 0x00000002
-#define IPG_SM_BCSTOCTETRCVDOK_BCSTFRAMESRCVDOK 0x00000004
-#define IPG_SM_RXJUMBOFRAMES 0x00000008
-#define IPG_SM_TCPCHECKSUMERRORS 0x00000010
-#define IPG_SM_IPCHECKSUMERRORS 0x00000020
-#define IPG_SM_UDPCHECKSUMERRORS 0x00000040
-#define IPG_SM_MACCONTROLFRAMESRCVD 0x00000080
-#define IPG_SM_FRAMESTOOLONGERRORS 0x00000100
-#define IPG_SM_INRANGELENGTHERRORS 0x00000200
-#define IPG_SM_FRAMECHECKSEQERRORS 0x00000400
-#define IPG_SM_FRAMESLOSTRXERRORS 0x00000800
-#define IPG_SM_OCTETXMTOK_FRAMESXMTOK 0x00001000
-#define IPG_SM_MCSTOCTETXMTOK_MCSTFRAMESXMTDOK 0x00002000
-#define IPG_SM_BCSTOCTETXMTOK_BCSTFRAMESXMTDOK 0x00004000
-#define IPG_SM_FRAMESWDEFERREDXMT 0x00008000
-#define IPG_SM_LATECOLLISIONS 0x00010000
-#define IPG_SM_MULTICOLFRAMES 0x00020000
-#define IPG_SM_SINGLECOLFRAMES 0x00040000
-#define IPG_SM_TXJUMBOFRAMES 0x00080000
-#define IPG_SM_CARRIERSENSEERRORS 0x00100000
-#define IPG_SM_MACCONTROLFRAMESXMTD 0x00200000
-#define IPG_SM_FRAMESABORTXSCOLLS 0x00400000
-#define IPG_SM_FRAMESWEXDEFERAL 0x00800000
-
-/* Countdown */
-#define IPG_CD_RSVD_MASK 0x0700FFFF
-#define IPG_CD_COUNT 0x0000FFFF
-#define IPG_CD_COUNTDOWNSPEED 0x01000000
-#define IPG_CD_COUNTDOWNMODE 0x02000000
-#define IPG_CD_COUNTINTENABLED 0x04000000
-
-/* TxDMABurstThresh */
-#define IPG_TB_RSVD_MASK 0xFF
-
-/* TxDMAUrgentThresh */
-#define IPG_TU_RSVD_MASK 0xFF
-
-/* TxDMAPollPeriod */
-#define IPG_TP_RSVD_MASK 0xFF
-
-/* RxDMAUrgentThresh */
-#define IPG_RU_RSVD_MASK 0xFF
-
-/* RxDMAPollPeriod */
-#define IPG_RP_RSVD_MASK 0xFF
-
-/* ReceiveMode */
-#define IPG_RM_RSVD_MASK 0x3F
-#define IPG_RM_RECEIVEUNICAST 0x01
-#define IPG_RM_RECEIVEMULTICAST 0x02
-#define IPG_RM_RECEIVEBROADCAST 0x04
-#define IPG_RM_RECEIVEALLFRAMES 0x08
-#define IPG_RM_RECEIVEMULTICASTHASH 0x10
-#define IPG_RM_RECEIVEIPMULTICAST 0x20
-
-/* PhySet */
-#define IPG_PS_MEM_LENB9B 0x01
-#define IPG_PS_MEM_LEN9 0x02
-#define IPG_PS_NON_COMPDET 0x04
-
-/* PhyCtrl */
-#define IPG_PC_RSVD_MASK 0xFF
-#define IPG_PC_MGMTCLK_LO 0x00
-#define IPG_PC_MGMTCLK_HI 0x01
-#define IPG_PC_MGMTCLK 0x01
-#define IPG_PC_MGMTDATA 0x02
-#define IPG_PC_MGMTDIR 0x04
-#define IPG_PC_DUPLEX_POLARITY 0x08
-#define IPG_PC_DUPLEX_STATUS 0x10
-#define IPG_PC_LINK_POLARITY 0x20
-#define IPG_PC_LINK_SPEED 0xC0
-#define IPG_PC_LINK_SPEED_10MBPS 0x40
-#define IPG_PC_LINK_SPEED_100MBPS 0x80
-#define IPG_PC_LINK_SPEED_1000MBPS 0xC0
-
-/* DMACtrl */
-#define IPG_DC_RSVD_MASK 0xC07D9818
-#define IPG_DC_RX_DMA_COMPLETE 0x00000008
-#define IPG_DC_RX_DMA_POLL_NOW 0x00000010
-#define IPG_DC_TX_DMA_COMPLETE 0x00000800
-#define IPG_DC_TX_DMA_POLL_NOW 0x00001000
-#define IPG_DC_TX_DMA_IN_PROG 0x00008000
-#define IPG_DC_RX_EARLY_DISABLE 0x00010000
-#define IPG_DC_MWI_DISABLE 0x00040000
-#define IPG_DC_TX_WRITE_BACK_DISABLE 0x00080000
-#define IPG_DC_TX_BURST_LIMIT 0x00700000
-#define IPG_DC_TARGET_ABORT 0x40000000
-#define IPG_DC_MASTER_ABORT 0x80000000
-
-/* ASICCtrl */
-#define IPG_AC_RSVD_MASK 0x07FFEFF2
-#define IPG_AC_EXP_ROM_SIZE 0x00000002
-#define IPG_AC_PHY_SPEED10 0x00000010
-#define IPG_AC_PHY_SPEED100 0x00000020
-#define IPG_AC_PHY_SPEED1000 0x00000040
-#define IPG_AC_PHY_MEDIA 0x00000080
-#define IPG_AC_FORCED_CFG 0x00000700
-#define IPG_AC_D3RESETDISABLE 0x00000800
-#define IPG_AC_SPEED_UP_MODE 0x00002000
-#define IPG_AC_LED_MODE 0x00004000
-#define IPG_AC_RST_OUT_POLARITY 0x00008000
-#define IPG_AC_GLOBAL_RESET 0x00010000
-#define IPG_AC_RX_RESET 0x00020000
-#define IPG_AC_TX_RESET 0x00040000
-#define IPG_AC_DMA 0x00080000
-#define IPG_AC_FIFO 0x00100000
-#define IPG_AC_NETWORK 0x00200000
-#define IPG_AC_HOST 0x00400000
-#define IPG_AC_AUTO_INIT 0x00800000
-#define IPG_AC_RST_OUT 0x01000000
-#define IPG_AC_INT_REQUEST 0x02000000
-#define IPG_AC_RESET_BUSY 0x04000000
-#define IPG_AC_LED_SPEED 0x08000000
-#define IPG_AC_LED_MODE_BIT_1 0x20000000
-
-/* EepromCtrl */
-#define IPG_EC_RSVD_MASK 0x83FF
-#define IPG_EC_EEPROM_ADDR 0x00FF
-#define IPG_EC_EEPROM_OPCODE 0x0300
-#define IPG_EC_EEPROM_SUBCOMMAD 0x0000
-#define IPG_EC_EEPROM_WRITEOPCODE 0x0100
-#define IPG_EC_EEPROM_READOPCODE 0x0200
-#define IPG_EC_EEPROM_ERASEOPCODE 0x0300
-#define IPG_EC_EEPROM_BUSY 0x8000
-
-/* FIFOCtrl */
-#define IPG_FC_RSVD_MASK 0xC001
-#define IPG_FC_RAM_TEST_MODE 0x0001
-#define IPG_FC_TRANSMITTING 0x4000
-#define IPG_FC_RECEIVING 0x8000
-
-/* TxStatus */
-#define IPG_TS_RSVD_MASK 0xFFFF00DD
-#define IPG_TS_TX_ERROR 0x00000001
-#define IPG_TS_LATE_COLLISION 0x00000004
-#define IPG_TS_TX_MAX_COLL 0x00000008
-#define IPG_TS_TX_UNDERRUN 0x00000010
-#define IPG_TS_TX_IND_REQD 0x00000040
-#define IPG_TS_TX_COMPLETE 0x00000080
-#define IPG_TS_TX_FRAMEID 0xFFFF0000
-
-/* WakeEvent */
-#define IPG_WE_WAKE_PKT_ENABLE 0x01
-#define IPG_WE_MAGIC_PKT_ENABLE 0x02
-#define IPG_WE_LINK_EVT_ENABLE 0x04
-#define IPG_WE_WAKE_POLARITY 0x08
-#define IPG_WE_WAKE_PKT_EVT 0x10
-#define IPG_WE_MAGIC_PKT_EVT 0x20
-#define IPG_WE_LINK_EVT 0x40
-#define IPG_WE_WOL_ENABLE 0x80
-
-/* IntEnable */
-#define IPG_IE_RSVD_MASK 0x1FFE
-#define IPG_IE_HOST_ERROR 0x0002
-#define IPG_IE_TX_COMPLETE 0x0004
-#define IPG_IE_MAC_CTRL_FRAME 0x0008
-#define IPG_IE_RX_COMPLETE 0x0010
-#define IPG_IE_RX_EARLY 0x0020
-#define IPG_IE_INT_REQUESTED 0x0040
-#define IPG_IE_UPDATE_STATS 0x0080
-#define IPG_IE_LINK_EVENT 0x0100
-#define IPG_IE_TX_DMA_COMPLETE 0x0200
-#define IPG_IE_RX_DMA_COMPLETE 0x0400
-#define IPG_IE_RFD_LIST_END 0x0800
-#define IPG_IE_RX_DMA_PRIORITY 0x1000
-
-/* IntStatus */
-#define IPG_IS_RSVD_MASK 0x1FFF
-#define IPG_IS_INTERRUPT_STATUS 0x0001
-#define IPG_IS_HOST_ERROR 0x0002
-#define IPG_IS_TX_COMPLETE 0x0004
-#define IPG_IS_MAC_CTRL_FRAME 0x0008
-#define IPG_IS_RX_COMPLETE 0x0010
-#define IPG_IS_RX_EARLY 0x0020
-#define IPG_IS_INT_REQUESTED 0x0040
-#define IPG_IS_UPDATE_STATS 0x0080
-#define IPG_IS_LINK_EVENT 0x0100
-#define IPG_IS_TX_DMA_COMPLETE 0x0200
-#define IPG_IS_RX_DMA_COMPLETE 0x0400
-#define IPG_IS_RFD_LIST_END 0x0800
-#define IPG_IS_RX_DMA_PRIORITY 0x1000
-
-/* MACCtrl */
-#define IPG_MC_RSVD_MASK 0x7FE33FA3
-#define IPG_MC_IFS_SELECT 0x00000003
-#define IPG_MC_IFS_4352BIT 0x00000003
-#define IPG_MC_IFS_1792BIT 0x00000002
-#define IPG_MC_IFS_1024BIT 0x00000001
-#define IPG_MC_IFS_96BIT 0x00000000
-#define IPG_MC_DUPLEX_SELECT 0x00000020
-#define IPG_MC_DUPLEX_SELECT_FD 0x00000020
-#define IPG_MC_DUPLEX_SELECT_HD 0x00000000
-#define IPG_MC_TX_FLOW_CONTROL_ENABLE 0x00000080
-#define IPG_MC_RX_FLOW_CONTROL_ENABLE 0x00000100
-#define IPG_MC_RCV_FCS 0x00000200
-#define IPG_MC_FIFO_LOOPBACK 0x00000400
-#define IPG_MC_MAC_LOOPBACK 0x00000800
-#define IPG_MC_AUTO_VLAN_TAGGING 0x00001000
-#define IPG_MC_AUTO_VLAN_UNTAGGING 0x00002000
-#define IPG_MC_COLLISION_DETECT 0x00010000
-#define IPG_MC_CARRIER_SENSE 0x00020000
-#define IPG_MC_STATISTICS_ENABLE 0x00200000
-#define IPG_MC_STATISTICS_DISABLE 0x00400000
-#define IPG_MC_STATISTICS_ENABLED 0x00800000
-#define IPG_MC_TX_ENABLE 0x01000000
-#define IPG_MC_TX_DISABLE 0x02000000
-#define IPG_MC_TX_ENABLED 0x04000000
-#define IPG_MC_RX_ENABLE 0x08000000
-#define IPG_MC_RX_DISABLE 0x10000000
-#define IPG_MC_RX_ENABLED 0x20000000
-#define IPG_MC_PAUSED 0x40000000
-
-/*
- * Tune
- */
-
-/* Assign IPG_APPEND_FCS_ON_TX > 0 for auto FCS append on TX. */
-#define IPG_APPEND_FCS_ON_TX 1
-
-/* Assign IPG_APPEND_FCS_ON_TX > 0 for auto FCS strip on RX. */
-#define IPG_STRIP_FCS_ON_RX 1
-
-/* Assign IPG_DROP_ON_RX_ETH_ERRORS > 0 to drop RX frames with
- * Ethernet errors.
- */
-#define IPG_DROP_ON_RX_ETH_ERRORS 1
-
-/* Assign IPG_INSERT_MANUAL_VLAN_TAG > 0 to insert VLAN tags manually
- * (via TFC).
- */
-#define IPG_INSERT_MANUAL_VLAN_TAG 0
-
-/* Assign IPG_ADD_IPCHECKSUM_ON_TX > 0 for auto IP checksum on TX. */
-#define IPG_ADD_IPCHECKSUM_ON_TX 0
-
-/* Assign IPG_ADD_TCPCHECKSUM_ON_TX > 0 for auto TCP checksum on TX.
- * DO NOT USE FOR SILICON REVISIONS B3 AND EARLIER.
- */
-#define IPG_ADD_TCPCHECKSUM_ON_TX 0
-
-/* Assign IPG_ADD_UDPCHECKSUM_ON_TX > 0 for auto UDP checksum on TX.
- * DO NOT USE FOR SILICON REVISIONS B3 AND EARLIER.
- */
-#define IPG_ADD_UDPCHECKSUM_ON_TX 0
-
-/* If inserting VLAN tags manually, assign the IPG_MANUAL_VLAN_xx
- * constants as desired.
- */
-#define IPG_MANUAL_VLAN_VID 0xABC
-#define IPG_MANUAL_VLAN_CFI 0x1
-#define IPG_MANUAL_VLAN_USERPRIORITY 0x5
-
-#define IPG_IO_REG_RANGE 0xFF
-#define IPG_MEM_REG_RANGE 0x154
-#define IPG_DRIVER_NAME "Sundance Technology IPG Triple-Speed Ethernet"
-#define IPG_NIC_PHY_ADDRESS 0x01
-#define IPG_DMALIST_ALIGN_PAD 0x07
-#define IPG_MULTICAST_HASHTABLE_SIZE 0x40
-
-/* Number of milliseconds to wait after issuing a software reset.
- * 0x05 <= IPG_AC_RESETWAIT to account for proper 10Mbps operation.
- */
-#define IPG_AC_RESETWAIT 0x05
-
-/* Number of IPG_AC_RESETWAIT timeperiods before declaring timeout. */
-#define IPG_AC_RESET_TIMEOUT 0x0A
-
-/* Minimum number of nanoseconds used to toggle MDC clock during
- * MII/GMII register access.
- */
-#define IPG_PC_PHYCTRLWAIT_NS 200
-
-#define IPG_TFDLIST_LENGTH 0x100
-
-/* Number of frames between TxDMAComplete interrupt.
- * 0 < IPG_FRAMESBETWEENTXDMACOMPLETES <= IPG_TFDLIST_LENGTH
- */
-#define IPG_FRAMESBETWEENTXDMACOMPLETES 0x1
-
-#define IPG_RFDLIST_LENGTH 0x100
-
-/* Maximum number of RFDs to process per interrupt.
- * 1 < IPG_MAXRFDPROCESS_COUNT < IPG_RFDLIST_LENGTH
- */
-#define IPG_MAXRFDPROCESS_COUNT 0x80
-
-/* Minimum margin between last freed RFD, and current RFD.
- * 1 < IPG_MINUSEDRFDSTOFREE < IPG_RFDLIST_LENGTH
- */
-#define IPG_MINUSEDRFDSTOFREE 0x80
-
-/* specify the jumbo frame maximum size
- * per unit is 0x600 (the rx_buffer size that one RFD can carry)
- */
-#define MAX_JUMBOSIZE 0x8 /* max is 12K */
-
-/* Key register values loaded at driver start up. */
-
-/* TXDMAPollPeriod is specified in 320ns increments.
- *
- * Value Time
- * ---------------------
- * 0x00-0x01 320ns
- * 0x03 ~1us
- * 0x1F ~10us
- * 0xFF ~82us
- */
-#define IPG_TXDMAPOLLPERIOD_VALUE 0x26
-
-/* TxDMAUrgentThresh specifies the minimum amount of
- * data in the transmit FIFO before asserting an
- * urgent transmit DMA request.
- *
- * Value Min TxFIFO occupied space before urgent TX request
- * ---------------------------------------------------------------
- * 0x00-0x04 128 bytes (1024 bits)
- * 0x27 1248 bytes (~10000 bits)
- * 0x30 1536 bytes (12288 bits)
- * 0xFF 8192 bytes (65535 bits)
- */
-#define IPG_TXDMAURGENTTHRESH_VALUE 0x04
-
-/* TxDMABurstThresh specifies the minimum amount of
- * free space in the transmit FIFO before asserting an
- * transmit DMA request.
- *
- * Value Min TxFIFO free space before TX request
- * ----------------------------------------------------
- * 0x00-0x08 256 bytes
- * 0x30 1536 bytes
- * 0xFF 8192 bytes
- */
-#define IPG_TXDMABURSTTHRESH_VALUE 0x30
-
-/* RXDMAPollPeriod is specified in 320ns increments.
- *
- * Value Time
- * ---------------------
- * 0x00-0x01 320ns
- * 0x03 ~1us
- * 0x1F ~10us
- * 0xFF ~82us
- */
-#define IPG_RXDMAPOLLPERIOD_VALUE 0x01
-
-/* RxDMAUrgentThresh specifies the minimum amount of
- * free space within the receive FIFO before asserting
- * a urgent receive DMA request.
- *
- * Value Min RxFIFO free space before urgent RX request
- * ---------------------------------------------------------------
- * 0x00-0x04 128 bytes (1024 bits)
- * 0x27 1248 bytes (~10000 bits)
- * 0x30 1536 bytes (12288 bits)
- * 0xFF 8192 bytes (65535 bits)
- */
-#define IPG_RXDMAURGENTTHRESH_VALUE 0x30
-
-/* RxDMABurstThresh specifies the minimum amount of
- * occupied space within the receive FIFO before asserting
- * a receive DMA request.
- *
- * Value Min TxFIFO free space before TX request
- * ----------------------------------------------------
- * 0x00-0x08 256 bytes
- * 0x30 1536 bytes
- * 0xFF 8192 bytes
- */
-#define IPG_RXDMABURSTTHRESH_VALUE 0x30
-
-/* FlowOnThresh specifies the maximum amount of occupied
- * space in the receive FIFO before a PAUSE frame with
- * maximum pause time transmitted.
- *
- * Value Max RxFIFO occupied space before PAUSE
- * ---------------------------------------------------
- * 0x0000 0 bytes
- * 0x0740 29,696 bytes
- * 0x07FF 32,752 bytes
- */
-#define IPG_FLOWONTHRESH_VALUE 0x0740
-
-/* FlowOffThresh specifies the minimum amount of occupied
- * space in the receive FIFO before a PAUSE frame with
- * zero pause time is transmitted.
- *
- * Value Max RxFIFO occupied space before PAUSE
- * ---------------------------------------------------
- * 0x0000 0 bytes
- * 0x00BF 3056 bytes
- * 0x07FF 32,752 bytes
- */
-#define IPG_FLOWOFFTHRESH_VALUE 0x00BF
-
-/*
- * Miscellaneous macros.
- */
-
-/* Macros for printing debug statements. */
-#ifdef IPG_DEBUG
-# define IPG_DEBUG_MSG(fmt, args...) \
-do { \
- if (0) \
- printk(KERN_DEBUG "IPG: " fmt, ##args); \
-} while (0)
-# define IPG_DDEBUG_MSG(fmt, args...) \
- printk(KERN_DEBUG "IPG: " fmt, ##args)
-# define IPG_DUMPRFDLIST(args) ipg_dump_rfdlist(args)
-# define IPG_DUMPTFDLIST(args) ipg_dump_tfdlist(args)
-#else
-# define IPG_DEBUG_MSG(fmt, args...) \
-do { \
- if (0) \
- printk(KERN_DEBUG "IPG: " fmt, ##args); \
-} while (0)
-# define IPG_DDEBUG_MSG(fmt, args...) \
-do { \
- if (0) \
- printk(KERN_DEBUG "IPG: " fmt, ##args); \
-} while (0)
-# define IPG_DUMPRFDLIST(args)
-# define IPG_DUMPTFDLIST(args)
-#endif
-
-/*
- * End miscellaneous macros.
- */
-
-/* Transmit Frame Descriptor. The IPG supports 15 fragments,
- * however Linux requires only a single fragment. Note, each
- * TFD field is 64 bits wide.
- */
-struct ipg_tx {
- __le64 next_desc;
- __le64 tfc;
- __le64 frag_info;
-};
-
-/* Receive Frame Descriptor. Note, each RFD field is 64 bits wide.
- */
-struct ipg_rx {
- __le64 next_desc;
- __le64 rfs;
- __le64 frag_info;
-};
-
-struct ipg_jumbo {
- int found_start;
- int current_size;
- struct sk_buff *skb;
-};
-
-/* Structure of IPG NIC specific data. */
-struct ipg_nic_private {
- void __iomem *ioaddr;
- struct ipg_tx *txd;
- struct ipg_rx *rxd;
- dma_addr_t txd_map;
- dma_addr_t rxd_map;
- struct sk_buff *tx_buff[IPG_TFDLIST_LENGTH];
- struct sk_buff *rx_buff[IPG_RFDLIST_LENGTH];
- unsigned int tx_current;
- unsigned int tx_dirty;
- unsigned int rx_current;
- unsigned int rx_dirty;
- bool is_jumbo;
- struct ipg_jumbo jumbo;
- unsigned long rxfrag_size;
- unsigned long rxsupport_size;
- unsigned long max_rxframe_size;
- unsigned int rx_buf_sz;
- struct pci_dev *pdev;
- struct net_device *dev;
- struct net_device_stats stats;
- spinlock_t lock;
- int tenmbpsmode;
-
- u16 led_mode;
- u16 station_addr[3]; /* Station Address in EEPROM Reg 0x10..0x12 */
-
- struct mutex mii_mutex;
- struct mii_if_info mii_if;
- int reset_current_tfd;
-#ifdef IPG_DEBUG
- int RFDlistendCount;
- int RFDListCheckedCount;
- int EmptyRFDListCount;
-#endif
- struct delayed_work task;
-};
-
-#endif /* __LINUX_IPG_H */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 639263d5e833..7781e80896a6 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -627,8 +627,10 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev)
/* verify the skb head is not shared */
err = skb_cow_head(skb, 0);
- if (err)
+ if (err) {
+ dev_kfree_skb(skb);
return NETDEV_TX_OK;
+ }
/* locate vlan header */
vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index 0ff8f01e57ee..1fd5ea82a9bc 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -567,10 +567,6 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw)
goto init_adminq_exit;
}
- /* initialize locks */
- mutex_init(&hw->aq.asq_mutex);
- mutex_init(&hw->aq.arq_mutex);
-
/* Set up register offsets */
i40e_adminq_init_regs(hw);
@@ -664,8 +660,6 @@ i40e_status i40e_shutdown_adminq(struct i40e_hw *hw)
i40e_shutdown_asq(hw);
i40e_shutdown_arq(hw);
- /* destroy the locks */
-
if (hw->nvm_buff.va)
i40e_free_virt_mem(hw, &hw->nvm_buff);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index b825f978d441..4a9873ec28c7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -10295,6 +10295,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* set up a default setting for link flow control */
pf->hw.fc.requested_mode = I40E_FC_NONE;
+ /* set up the locks for the AQ, do this only once in probe
+ * and destroy them only once in remove
+ */
+ mutex_init(&hw->aq.asq_mutex);
+ mutex_init(&hw->aq.arq_mutex);
+
err = i40e_init_adminq(hw);
/* provide nvm, fw, api versions */
@@ -10697,7 +10703,6 @@ static void i40e_remove(struct pci_dev *pdev)
set_bit(__I40E_DOWN, &pf->state);
del_timer_sync(&pf->service_timer);
cancel_work_sync(&pf->service_task);
- i40e_fdir_teardown(pf);
if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
i40e_free_vfs(pf);
@@ -10740,6 +10745,10 @@ static void i40e_remove(struct pci_dev *pdev)
"Failed to destroy the Admin Queue resources: %d\n",
ret_code);
+ /* destroy the locks only once, here */
+ mutex_destroy(&hw->aq.arq_mutex);
+ mutex_destroy(&hw->aq.asq_mutex);
+
/* Clear all dynamic memory lists of rings, q_vectors, and VSIs */
i40e_clear_interrupt_scheme(pf);
for (i = 0; i < pf->num_alloc_vsi; i++) {
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
index fd123ca60761..3f65e39b3fe4 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
@@ -551,10 +551,6 @@ i40e_status i40evf_init_adminq(struct i40e_hw *hw)
goto init_adminq_exit;
}
- /* initialize locks */
- mutex_init(&hw->aq.asq_mutex);
- mutex_init(&hw->aq.arq_mutex);
-
/* Set up register offsets */
i40e_adminq_init_regs(hw);
@@ -596,8 +592,6 @@ i40e_status i40evf_shutdown_adminq(struct i40e_hw *hw)
i40e_shutdown_asq(hw);
i40e_shutdown_arq(hw);
- /* destroy the locks */
-
if (hw->nvm_buff.va)
i40e_free_virt_mem(hw, &hw->nvm_buff);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index d962164dfb0f..99d2cffae0cd 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -2476,6 +2476,12 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
hw->bus.device = PCI_SLOT(pdev->devfn);
hw->bus.func = PCI_FUNC(pdev->devfn);
+ /* set up the locks for the AQ, do this only once in probe
+ * and destroy them only once in remove
+ */
+ mutex_init(&hw->aq.asq_mutex);
+ mutex_init(&hw->aq.arq_mutex);
+
INIT_LIST_HEAD(&adapter->mac_filter_list);
INIT_LIST_HEAD(&adapter->vlan_filter_list);
@@ -2629,6 +2635,10 @@ static void i40evf_remove(struct pci_dev *pdev)
if (hw->aq.asq.count)
i40evf_shutdown_adminq(hw);
+ /* destroy the locks only once, here */
+ mutex_destroy(&hw->aq.arq_mutex);
+ mutex_destroy(&hw->aq.asq_mutex);
+
iounmap(hw->hw_addr);
pci_release_regions(pdev);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 47395ff5d908..aed8d029b23d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7920,6 +7920,9 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc)
*/
if (netif_running(dev))
ixgbe_close(dev);
+ else
+ ixgbe_reset(adapter);
+
ixgbe_clear_interrupt_scheme(adapter);
#ifdef CONFIG_IXGBE_DCB
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index e84c7f2634d3..ed622fa29dfa 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -36,7 +36,7 @@
/* Registers */
#define MVNETA_RXQ_CONFIG_REG(q) (0x1400 + ((q) << 2))
-#define MVNETA_RXQ_HW_BUF_ALLOC BIT(1)
+#define MVNETA_RXQ_HW_BUF_ALLOC BIT(0)
#define MVNETA_RXQ_PKT_OFFSET_ALL_MASK (0xf << 8)
#define MVNETA_RXQ_PKT_OFFSET_MASK(offs) ((offs) << 8)
#define MVNETA_RXQ_THRESHOLD_REG(q) (0x14c0 + ((q) << 2))
@@ -62,6 +62,7 @@
#define MVNETA_WIN_SIZE(w) (0x2204 + ((w) << 3))
#define MVNETA_WIN_REMAP(w) (0x2280 + ((w) << 2))
#define MVNETA_BASE_ADDR_ENABLE 0x2290
+#define MVNETA_ACCESS_PROTECT_ENABLE 0x2294
#define MVNETA_PORT_CONFIG 0x2400
#define MVNETA_UNI_PROMISC_MODE BIT(0)
#define MVNETA_DEF_RXQ(q) ((q) << 1)
@@ -159,7 +160,7 @@
#define MVNETA_INTR_ENABLE 0x25b8
#define MVNETA_TXQ_INTR_ENABLE_ALL_MASK 0x0000ff00
-#define MVNETA_RXQ_INTR_ENABLE_ALL_MASK 0xff000000 // note: neta says it's 0x000000FF
+#define MVNETA_RXQ_INTR_ENABLE_ALL_MASK 0x000000ff
#define MVNETA_RXQ_CMD 0x2680
#define MVNETA_RXQ_DISABLE_SHIFT 8
@@ -242,6 +243,7 @@
#define MVNETA_VLAN_TAG_LEN 4
#define MVNETA_CPU_D_CACHE_LINE_SIZE 32
+#define MVNETA_TX_CSUM_DEF_SIZE 1600
#define MVNETA_TX_CSUM_MAX_SIZE 9800
#define MVNETA_ACC_MODE_EXT 1
@@ -1579,12 +1581,16 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
}
skb = build_skb(data, pp->frag_size > PAGE_SIZE ? 0 : pp->frag_size);
- if (!skb)
- goto err_drop_frame;
+ /* After refill old buffer has to be unmapped regardless
+ * the skb is successfully built or not.
+ */
dma_unmap_single(dev->dev.parent, phys_addr,
MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
+ if (!skb)
+ goto err_drop_frame;
+
rcvd_pkts++;
rcvd_bytes += rx_bytes;
@@ -3191,6 +3197,7 @@ static void mvneta_conf_mbus_windows(struct mvneta_port *pp,
}
mvreg_write(pp, MVNETA_BASE_ADDR_ENABLE, win_enable);
+ mvreg_write(pp, MVNETA_ACCESS_PROTECT_ENABLE, win_protect);
}
/* Power up the port */
@@ -3250,6 +3257,7 @@ static int mvneta_probe(struct platform_device *pdev)
char hw_mac_addr[ETH_ALEN];
const char *mac_from;
const char *managed;
+ int tx_csum_limit;
int phy_mode;
int err;
int cpu;
@@ -3350,8 +3358,21 @@ static int mvneta_probe(struct platform_device *pdev)
}
}
- if (of_device_is_compatible(dn, "marvell,armada-370-neta"))
- pp->tx_csum_limit = 1600;
+ if (!of_property_read_u32(dn, "tx-csum-limit", &tx_csum_limit)) {
+ if (tx_csum_limit < 0 ||
+ tx_csum_limit > MVNETA_TX_CSUM_MAX_SIZE) {
+ tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE;
+ dev_info(&pdev->dev,
+ "Wrong TX csum limit in DT, set to %dB\n",
+ MVNETA_TX_CSUM_DEF_SIZE);
+ }
+ } else if (of_device_is_compatible(dn, "marvell,armada-370-neta")) {
+ tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE;
+ } else {
+ tx_csum_limit = MVNETA_TX_CSUM_MAX_SIZE;
+ }
+
+ pp->tx_csum_limit = tx_csum_limit;
pp->tx_ring_size = MVNETA_MAX_TXD;
pp->rx_ring_size = MVNETA_MAX_RXD;
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index d9884fd15b45..a4beccf1fd46 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -3413,16 +3413,23 @@ static void mvpp2_bm_pool_bufsize_set(struct mvpp2 *priv,
}
/* Free all buffers from the pool */
-static void mvpp2_bm_bufs_free(struct mvpp2 *priv, struct mvpp2_bm_pool *bm_pool)
+static void mvpp2_bm_bufs_free(struct device *dev, struct mvpp2 *priv,
+ struct mvpp2_bm_pool *bm_pool)
{
int i;
for (i = 0; i < bm_pool->buf_num; i++) {
+ dma_addr_t buf_phys_addr;
u32 vaddr;
/* Get buffer virtual address (indirect access) */
- mvpp2_read(priv, MVPP2_BM_PHY_ALLOC_REG(bm_pool->id));
+ buf_phys_addr = mvpp2_read(priv,
+ MVPP2_BM_PHY_ALLOC_REG(bm_pool->id));
vaddr = mvpp2_read(priv, MVPP2_BM_VIRT_ALLOC_REG);
+
+ dma_unmap_single(dev, buf_phys_addr,
+ bm_pool->buf_size, DMA_FROM_DEVICE);
+
if (!vaddr)
break;
dev_kfree_skb_any((struct sk_buff *)vaddr);
@@ -3439,7 +3446,7 @@ static int mvpp2_bm_pool_destroy(struct platform_device *pdev,
{
u32 val;
- mvpp2_bm_bufs_free(priv, bm_pool);
+ mvpp2_bm_bufs_free(&pdev->dev, priv, bm_pool);
if (bm_pool->buf_num) {
WARN(1, "cannot free all buffers in pool %d\n", bm_pool->id);
return 0;
@@ -3692,7 +3699,8 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
MVPP2_BM_LONG_BUF_NUM :
MVPP2_BM_SHORT_BUF_NUM;
else
- mvpp2_bm_bufs_free(port->priv, new_pool);
+ mvpp2_bm_bufs_free(port->dev->dev.parent,
+ port->priv, new_pool);
new_pool->pkt_size = pkt_size;
@@ -3756,7 +3764,7 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
int pkt_size = MVPP2_RX_PKT_SIZE(mtu);
/* Update BM pool with new buffer size */
- mvpp2_bm_bufs_free(port->priv, port_pool);
+ mvpp2_bm_bufs_free(dev->dev.parent, port->priv, port_pool);
if (port_pool->buf_num) {
WARN(1, "cannot free all buffers in pool %d\n", port_pool->id);
return -EIO;
@@ -4401,11 +4409,10 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port,
mvpp2_txq_inc_get(txq_pcpu);
- if (!skb)
- continue;
-
dma_unmap_single(port->dev->dev.parent, buf_phys_addr,
skb_headlen(skb), DMA_TO_DEVICE);
+ if (!skb)
+ continue;
dev_kfree_skb_any(skb);
}
}
@@ -5092,7 +5099,8 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
struct mvpp2_rx_queue *rxq)
{
struct net_device *dev = port->dev;
- int rx_received, rx_filled, i;
+ int rx_received;
+ int rx_done = 0;
u32 rcvd_pkts = 0;
u32 rcvd_bytes = 0;
@@ -5101,17 +5109,18 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
if (rx_todo > rx_received)
rx_todo = rx_received;
- rx_filled = 0;
- for (i = 0; i < rx_todo; i++) {
+ while (rx_done < rx_todo) {
struct mvpp2_rx_desc *rx_desc = mvpp2_rxq_next_desc_get(rxq);
struct mvpp2_bm_pool *bm_pool;
struct sk_buff *skb;
+ dma_addr_t phys_addr;
u32 bm, rx_status;
int pool, rx_bytes, err;
- rx_filled++;
+ rx_done++;
rx_status = rx_desc->status;
rx_bytes = rx_desc->data_size - MVPP2_MH_SIZE;
+ phys_addr = rx_desc->buf_phys_addr;
bm = mvpp2_bm_cookie_build(rx_desc);
pool = mvpp2_bm_cookie_pool_get(bm);
@@ -5128,8 +5137,10 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
* comprised by the RX descriptor.
*/
if (rx_status & MVPP2_RXD_ERR_SUMMARY) {
+ err_drop_frame:
dev->stats.rx_errors++;
mvpp2_rx_error(port, rx_desc);
+ /* Return the buffer to the pool */
mvpp2_pool_refill(port, bm, rx_desc->buf_phys_addr,
rx_desc->buf_cookie);
continue;
@@ -5137,6 +5148,15 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
skb = (struct sk_buff *)rx_desc->buf_cookie;
+ err = mvpp2_rx_refill(port, bm_pool, bm, 0);
+ if (err) {
+ netdev_err(port->dev, "failed to refill BM pools\n");
+ goto err_drop_frame;
+ }
+
+ dma_unmap_single(dev->dev.parent, phys_addr,
+ bm_pool->buf_size, DMA_FROM_DEVICE);
+
rcvd_pkts++;
rcvd_bytes += rx_bytes;
atomic_inc(&bm_pool->in_use);
@@ -5147,12 +5167,6 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
mvpp2_rx_csum(port, rx_status, skb);
napi_gro_receive(&port->napi, skb);
-
- err = mvpp2_rx_refill(port, bm_pool, bm, 0);
- if (err) {
- netdev_err(port->dev, "failed to refill BM pools\n");
- rx_filled--;
- }
}
if (rcvd_pkts) {
@@ -5166,7 +5180,7 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
/* Update Rx queue management counters */
wmb();
- mvpp2_rxq_status_update(port, rxq->id, rx_todo, rx_filled);
+ mvpp2_rxq_status_update(port, rxq->id, rx_done, rx_done);
return rx_todo;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 2177e56ed0be..d48d5793407d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1010,7 +1010,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
if (!(smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
smp->method == IB_MGMT_METHOD_GET) || network_view) {
mlx4_err(dev, "Unprivileged slave %d is trying to execute a Subnet MGMT MAD, class 0x%x, method 0x%x, view=%s for attr 0x%x. Rejecting\n",
- slave, smp->method, smp->mgmt_class,
+ slave, smp->mgmt_class, smp->method,
network_view ? "Network" : "Host",
be16_to_cpu(smp->attr_id));
return -EPERM;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index 8a083d73efdb..038f9ce391e6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -242,6 +242,13 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev)
unsigned long flags;
u64 ns, zero = 0;
+ /* mlx4_en_init_timestamp is called for each netdev.
+ * mdev->ptp_clock is common for all ports, skip initialization if
+ * was done for other port.
+ */
+ if (mdev->ptp_clock)
+ return;
+
rwlock_init(&mdev->clock_lock);
memset(&mdev->cycles, 0, sizeof(mdev->cycles));
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index 005f910ec955..e0ec280a7fa1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -232,9 +232,6 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr)
if (mdev->pndev[i])
mlx4_en_destroy_netdev(mdev->pndev[i]);
- if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS)
- mlx4_en_remove_timestamp(mdev);
-
flush_workqueue(mdev->workqueue);
destroy_workqueue(mdev->workqueue);
(void) mlx4_mr_free(dev, &mdev->mr);
@@ -320,10 +317,6 @@ static void *mlx4_en_add(struct mlx4_dev *dev)
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
mdev->port_cnt++;
- /* Initialize time stamp mechanism */
- if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS)
- mlx4_en_init_timestamp(mdev);
-
/* Set default number of RX rings*/
mlx4_en_set_num_rx_rings(mdev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 886e1bc86374..7869f97de5da 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2072,6 +2072,9 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
/* flush any pending task for this netdev */
flush_workqueue(mdev->workqueue);
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS)
+ mlx4_en_remove_timestamp(mdev);
+
/* Detach the netdev so tasks would not attempt to access it */
mutex_lock(&mdev->state_lock);
mdev->pndev[priv->port] = NULL;
@@ -3058,9 +3061,12 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
}
queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY);
+ /* Initialize time stamp mechanism */
if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS)
- queue_delayed_work(mdev->workqueue, &priv->service_task,
- SERVICE_TASK_DELAY);
+ mlx4_en_init_timestamp(mdev);
+
+ queue_delayed_work(mdev->workqueue, &priv->service_task,
+ SERVICE_TASK_DELAY);
mlx4_en_set_stats_bitmap(mdev->dev, &priv->stats_bitmap,
mdev->profile.prof[priv->port].rx_ppp,
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 85f1b1e7e505..31c491e02e69 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -892,9 +892,10 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
dev->caps.port_mask[i] = dev->caps.port_type[i];
dev->caps.phys_port_id[i] = func_cap.phys_port_id;
- if (mlx4_get_slave_pkey_gid_tbl_len(dev, i,
- &dev->caps.gid_table_len[i],
- &dev->caps.pkey_table_len[i]))
+ err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
+ &dev->caps.gid_table_len[i],
+ &dev->caps.pkey_table_len[i]);
+ if (err)
goto err_mem;
}
@@ -906,6 +907,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
dev->caps.uar_page_size * dev->caps.num_uars,
(unsigned long long)
pci_resource_len(dev->persist->pdev, 2));
+ err = -ENOMEM;
goto err_mem;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 9813d34f3e5b..cad6c44df91c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -4306,9 +4306,10 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
return -EOPNOTSUPP;
ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf;
- ctrl->port = mlx4_slave_convert_port(dev, slave, ctrl->port);
- if (ctrl->port <= 0)
+ err = mlx4_slave_convert_port(dev, slave, ctrl->port);
+ if (err <= 0)
return -EINVAL;
+ ctrl->port = err;
qpn = be32_to_cpu(ctrl->qpn) & 0xffffff;
err = get_res(dev, slave, qpn, RES_QP, &rqp);
if (err) {
@@ -4952,26 +4953,41 @@ static void rem_slave_counters(struct mlx4_dev *dev, int slave)
struct res_counter *counter;
struct res_counter *tmp;
int err;
- int index;
+ int *counters_arr = NULL;
+ int i, j;
err = move_all_busy(dev, slave, RES_COUNTER);
if (err)
mlx4_warn(dev, "rem_slave_counters: Could not move all counters - too busy for slave %d\n",
slave);
- spin_lock_irq(mlx4_tlock(dev));
- list_for_each_entry_safe(counter, tmp, counter_list, com.list) {
- if (counter->com.owner == slave) {
- index = counter->com.res_id;
- rb_erase(&counter->com.node,
- &tracker->res_tree[RES_COUNTER]);
- list_del(&counter->com.list);
- kfree(counter);
- __mlx4_counter_free(dev, index);
+ counters_arr = kmalloc_array(dev->caps.max_counters,
+ sizeof(*counters_arr), GFP_KERNEL);
+ if (!counters_arr)
+ return;
+
+ do {
+ i = 0;
+ j = 0;
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(counter, tmp, counter_list, com.list) {
+ if (counter->com.owner == slave) {
+ counters_arr[i++] = counter->com.res_id;
+ rb_erase(&counter->com.node,
+ &tracker->res_tree[RES_COUNTER]);
+ list_del(&counter->com.list);
+ kfree(counter);
+ }
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+
+ while (j < i) {
+ __mlx4_counter_free(dev, counters_arr[j++]);
mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0);
}
- }
- spin_unlock_irq(mlx4_tlock(dev));
+ } while (i);
+
+ kfree(counters_arr);
}
static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index f2ae62dd8c09..22e72bf1ae48 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -334,9 +334,15 @@ struct mlx5e_tx_skb_cb {
#define MLX5E_TX_SKB_CB(__skb) ((struct mlx5e_tx_skb_cb *)__skb->cb)
+enum mlx5e_dma_map_type {
+ MLX5E_DMA_MAP_SINGLE,
+ MLX5E_DMA_MAP_PAGE
+};
+
struct mlx5e_sq_dma {
- dma_addr_t addr;
- u32 size;
+ dma_addr_t addr;
+ u32 size;
+ enum mlx5e_dma_map_type type;
};
enum {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5fc4d2d78cdf..1e52db32c73d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1332,6 +1332,42 @@ static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt)
return err;
}
+static int mlx5e_refresh_tir_self_loopback_enable(struct mlx5_core_dev *mdev,
+ u32 tirn)
+{
+ void *in;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
+
+ err = mlx5_core_modify_tir(mdev, tirn, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < MLX5E_NUM_TT; i++) {
+ err = mlx5e_refresh_tir_self_loopback_enable(priv->mdev,
+ priv->tirn[i]);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int mlx5e_set_dev_port_mtu(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -1376,6 +1412,13 @@ int mlx5e_open_locked(struct net_device *netdev)
goto err_clear_state_opened_flag;
}
+ err = mlx5e_refresh_tirs_self_loopback_enable(priv);
+ if (err) {
+ netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n",
+ __func__, err);
+ goto err_close_channels;
+ }
+
mlx5e_update_carrier(priv);
mlx5e_redirect_rqts(priv);
@@ -1383,6 +1426,8 @@ int mlx5e_open_locked(struct net_device *netdev)
return 0;
+err_close_channels:
+ mlx5e_close_channels(priv);
err_clear_state_opened_flag:
clear_bit(MLX5E_STATE_OPENED, &priv->state);
return err;
@@ -1856,6 +1901,8 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
+ max_mtu = MLX5E_HW2SW_MTU(max_mtu);
+
if (new_mtu > max_mtu) {
netdev_err(netdev,
"%s: Bad MTU (%d) > (%d) Max\n",
@@ -1909,6 +1956,9 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
"Not creating net device, some required device capabilities are missing\n");
return -ENOTSUPP;
}
+ if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable))
+ mlx5_core_warn(mdev, "Self loop back prevention is not supported\n");
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index cd8f85a251d7..1341b1d3c421 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -61,39 +61,47 @@ void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw)
}
}
-static void mlx5e_dma_pop_last_pushed(struct mlx5e_sq *sq, dma_addr_t *addr,
- u32 *size)
+static inline void mlx5e_tx_dma_unmap(struct device *pdev,
+ struct mlx5e_sq_dma *dma)
{
- sq->dma_fifo_pc--;
- *addr = sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr;
- *size = sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size;
-}
-
-static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, struct sk_buff *skb)
-{
- dma_addr_t addr;
- u32 size;
- int i;
-
- for (i = 0; i < MLX5E_TX_SKB_CB(skb)->num_dma; i++) {
- mlx5e_dma_pop_last_pushed(sq, &addr, &size);
- dma_unmap_single(sq->pdev, addr, size, DMA_TO_DEVICE);
+ switch (dma->type) {
+ case MLX5E_DMA_MAP_SINGLE:
+ dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+ break;
+ case MLX5E_DMA_MAP_PAGE:
+ dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+ break;
+ default:
+ WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
}
}
-static inline void mlx5e_dma_push(struct mlx5e_sq *sq, dma_addr_t addr,
- u32 size)
+static inline void mlx5e_dma_push(struct mlx5e_sq *sq,
+ dma_addr_t addr,
+ u32 size,
+ enum mlx5e_dma_map_type map_type)
{
sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr;
sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size;
+ sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].type = map_type;
sq->dma_fifo_pc++;
}
-static inline void mlx5e_dma_get(struct mlx5e_sq *sq, u32 i, dma_addr_t *addr,
- u32 *size)
+static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i)
{
- *addr = sq->dma_fifo[i & sq->dma_fifo_mask].addr;
- *size = sq->dma_fifo[i & sq->dma_fifo_mask].size;
+ return &sq->dma_fifo[i & sq->dma_fifo_mask];
+}
+
+static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, struct sk_buff *skb)
+{
+ int i;
+
+ for (i = 0; i < MLX5E_TX_SKB_CB(skb)->num_dma; i++) {
+ struct mlx5e_sq_dma *last_pushed_dma =
+ mlx5e_dma_get(sq, --sq->dma_fifo_pc);
+
+ mlx5e_tx_dma_unmap(sq->pdev, last_pushed_dma);
+ }
}
u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
@@ -118,8 +126,15 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
*/
#define MLX5E_MIN_INLINE ETH_HLEN
- if (bf && (skb_headlen(skb) <= sq->max_inline))
- return skb_headlen(skb);
+ if (bf) {
+ u16 ihs = skb_headlen(skb);
+
+ if (skb_vlan_tag_present(skb))
+ ihs += VLAN_HLEN;
+
+ if (ihs <= sq->max_inline)
+ return skb_headlen(skb);
+ }
return MLX5E_MIN_INLINE;
}
@@ -218,7 +233,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
dseg->lkey = sq->mkey_be;
dseg->byte_count = cpu_to_be32(headlen);
- mlx5e_dma_push(sq, dma_addr, headlen);
+ mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE);
MLX5E_TX_SKB_CB(skb)->num_dma++;
dseg++;
@@ -237,7 +252,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
dseg->lkey = sq->mkey_be;
dseg->byte_count = cpu_to_be32(fsz);
- mlx5e_dma_push(sq, dma_addr, fsz);
+ mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE);
MLX5E_TX_SKB_CB(skb)->num_dma++;
dseg++;
@@ -353,13 +368,10 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq)
}
for (j = 0; j < MLX5E_TX_SKB_CB(skb)->num_dma; j++) {
- dma_addr_t addr;
- u32 size;
+ struct mlx5e_sq_dma *dma =
+ mlx5e_dma_get(sq, dma_fifo_cc++);
- mlx5e_dma_get(sq, dma_fifo_cc, &addr, &size);
- dma_fifo_cc++;
- dma_unmap_single(sq->pdev, addr, size,
- DMA_TO_DEVICE);
+ mlx5e_tx_dma_unmap(sq->pdev, dma);
}
npkts++;
diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c
index b83f7c0fcf99..122c2ee3dfe2 100644
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c
@@ -1937,6 +1937,12 @@ static void refill_rx(struct net_device *dev)
break; /* Better luck next round. */
np->rx_dma[entry] = pci_map_single(np->pci_dev,
skb->data, buflen, PCI_DMA_FROMDEVICE);
+ if (pci_dma_mapping_error(np->pci_dev,
+ np->rx_dma[entry])) {
+ dev_kfree_skb_any(skb);
+ np->rx_skbuff[entry] = NULL;
+ break; /* Better luck next round. */
+ }
np->rx_ring[entry].addr = cpu_to_le32(np->rx_dma[entry]);
}
np->rx_ring[entry].cmd_status = cpu_to_le32(np->rx_buf_sz);
@@ -2093,6 +2099,12 @@ static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev)
np->tx_skbuff[entry] = skb;
np->tx_dma[entry] = pci_map_single(np->pci_dev,
skb->data,skb->len, PCI_DMA_TODEVICE);
+ if (pci_dma_mapping_error(np->pci_dev, np->tx_dma[entry])) {
+ np->tx_skbuff[entry] = NULL;
+ dev_kfree_skb_irq(skb);
+ dev->stats.tx_dropped++;
+ return NETDEV_TX_OK;
+ }
np->tx_ring[entry].addr = cpu_to_le32(np->tx_dma[entry]);
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index b159ef8303cc..057665180f13 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -1326,7 +1326,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
/* Get platform resources */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
irq = platform_get_irq(pdev, 0);
- if ((!res) || (irq < 0) || (irq >= NR_IRQS)) {
+ if (!res || irq < 0) {
dev_err(&pdev->dev, "error getting resources.\n");
ret = -ENXIO;
goto err_exit;
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index ac17d8669b1a..1292c360390c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -299,6 +299,7 @@ struct qed_hwfn {
/* Flag indicating whether interrupts are enabled or not*/
bool b_int_enabled;
+ bool b_int_requested;
struct qed_mcp_info *mcp_info;
@@ -491,6 +492,8 @@ u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
u32 input_len, u8 *input_buf,
u32 max_size, u8 *unzip_buf);
+int qed_slowpath_irq_req(struct qed_hwfn *hwfn);
+
#define QED_ETH_INTERFACE_VERSION 300
#endif /* _QED_H */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 803b190ccada..817bbd5476ff 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1385,52 +1385,63 @@ err0:
return rc;
}
-static u32 qed_hw_bar_size(struct qed_dev *cdev,
- u8 bar_id)
+static u32 qed_hw_bar_size(struct qed_hwfn *p_hwfn,
+ u8 bar_id)
{
- u32 size = pci_resource_len(cdev->pdev, (bar_id > 0) ? 2 : 0);
+ u32 bar_reg = (bar_id == 0 ? PGLUE_B_REG_PF_BAR0_SIZE
+ : PGLUE_B_REG_PF_BAR1_SIZE);
+ u32 val = qed_rd(p_hwfn, p_hwfn->p_main_ptt, bar_reg);
- return size / cdev->num_hwfns;
+ /* Get the BAR size(in KB) from hardware given val */
+ return 1 << (val + 15);
}
int qed_hw_prepare(struct qed_dev *cdev,
int personality)
{
- int rc, i;
+ struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+ int rc;
/* Store the precompiled init data ptrs */
qed_init_iro_array(cdev);
/* Initialize the first hwfn - will learn number of hwfns */
- rc = qed_hw_prepare_single(&cdev->hwfns[0], cdev->regview,
+ rc = qed_hw_prepare_single(p_hwfn,
+ cdev->regview,
cdev->doorbells, personality);
if (rc)
return rc;
- personality = cdev->hwfns[0].hw_info.personality;
+ personality = p_hwfn->hw_info.personality;
/* Initialize the rest of the hwfns */
- for (i = 1; i < cdev->num_hwfns; i++) {
+ if (cdev->num_hwfns > 1) {
void __iomem *p_regview, *p_doorbell;
+ u8 __iomem *addr;
+
+ /* adjust bar offset for second engine */
+ addr = cdev->regview + qed_hw_bar_size(p_hwfn, 0) / 2;
+ p_regview = addr;
- p_regview = cdev->regview +
- i * qed_hw_bar_size(cdev, 0);
- p_doorbell = cdev->doorbells +
- i * qed_hw_bar_size(cdev, 1);
- rc = qed_hw_prepare_single(&cdev->hwfns[i], p_regview,
+ /* adjust doorbell bar offset for second engine */
+ addr = cdev->doorbells + qed_hw_bar_size(p_hwfn, 1) / 2;
+ p_doorbell = addr;
+
+ /* prepare second hw function */
+ rc = qed_hw_prepare_single(&cdev->hwfns[1], p_regview,
p_doorbell, personality);
+
+ /* in case of error, need to free the previously
+ * initiliazed hwfn 0.
+ */
if (rc) {
- /* Cleanup previously initialized hwfns */
- while (--i >= 0) {
- qed_init_free(&cdev->hwfns[i]);
- qed_mcp_free(&cdev->hwfns[i]);
- qed_hw_hwfn_free(&cdev->hwfns[i]);
- }
- return rc;
+ qed_init_free(p_hwfn);
+ qed_mcp_free(p_hwfn);
+ qed_hw_hwfn_free(p_hwfn);
}
}
- return 0;
+ return rc;
}
void qed_hw_remove(struct qed_dev *cdev)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index de50e84902af..9cc9d62c1fec 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -783,22 +783,16 @@ void qed_int_igu_enable_int(struct qed_hwfn *p_hwfn,
qed_wr(p_hwfn, p_ptt, IGU_REG_PF_CONFIGURATION, igu_pf_conf);
}
-void qed_int_igu_enable(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt,
- enum qed_int_mode int_mode)
+int qed_int_igu_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+ enum qed_int_mode int_mode)
{
- int i;
-
- p_hwfn->b_int_enabled = 1;
+ int rc, i;
/* Mask non-link attentions */
for (i = 0; i < 9; i++)
qed_wr(p_hwfn, p_ptt,
MISC_REG_AEU_ENABLE1_IGU_OUT_0 + (i << 2), 0);
- /* Enable interrupt Generation */
- qed_int_igu_enable_int(p_hwfn, p_ptt, int_mode);
-
/* Configure AEU signal change to produce attentions for link */
qed_wr(p_hwfn, p_ptt, IGU_REG_LEADING_EDGE_LATCH, 0xfff);
qed_wr(p_hwfn, p_ptt, IGU_REG_TRAILING_EDGE_LATCH, 0xfff);
@@ -808,6 +802,19 @@ void qed_int_igu_enable(struct qed_hwfn *p_hwfn,
/* Unmask AEU signals toward IGU */
qed_wr(p_hwfn, p_ptt, MISC_REG_AEU_MASK_ATTN_IGU, 0xff);
+ if ((int_mode != QED_INT_MODE_INTA) || IS_LEAD_HWFN(p_hwfn)) {
+ rc = qed_slowpath_irq_req(p_hwfn);
+ if (rc != 0) {
+ DP_NOTICE(p_hwfn, "Slowpath IRQ request failed\n");
+ return -EINVAL;
+ }
+ p_hwfn->b_int_requested = true;
+ }
+ /* Enable interrupt Generation */
+ qed_int_igu_enable_int(p_hwfn, p_ptt, int_mode);
+ p_hwfn->b_int_enabled = 1;
+
+ return rc;
}
void qed_int_igu_disable_int(struct qed_hwfn *p_hwfn,
@@ -1127,3 +1134,11 @@ int qed_int_get_num_sbs(struct qed_hwfn *p_hwfn,
return info->igu_sb_cnt;
}
+
+void qed_int_disable_post_isr_release(struct qed_dev *cdev)
+{
+ int i;
+
+ for_each_hwfn(cdev, i)
+ cdev->hwfns[i].b_int_requested = false;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
index 16b57518e706..51e0b09a7f47 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
@@ -169,10 +169,14 @@ int qed_int_get_num_sbs(struct qed_hwfn *p_hwfn,
int *p_iov_blks);
/**
- * @file
+ * @brief qed_int_disable_post_isr_release - performs the cleanup post ISR
+ * release. The API need to be called after releasing all slowpath IRQs
+ * of the device.
+ *
+ * @param cdev
*
- * @brief Interrupt handler
*/
+void qed_int_disable_post_isr_release(struct qed_dev *cdev);
#define QED_CAU_DEF_RX_TIMER_RES 0
#define QED_CAU_DEF_TX_TIMER_RES 0
@@ -366,10 +370,11 @@ void qed_int_setup(struct qed_hwfn *p_hwfn,
* @param p_hwfn
* @param p_ptt
* @param int_mode
+ *
+ * @return int
*/
-void qed_int_igu_enable(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt,
- enum qed_int_mode int_mode);
+int qed_int_igu_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+ enum qed_int_mode int_mode);
/**
* @brief - Initialize CAU status block entry
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 947c7af72b25..174f7341c5c3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -476,41 +476,22 @@ static irqreturn_t qed_single_int(int irq, void *dev_instance)
return rc;
}
-static int qed_slowpath_irq_req(struct qed_dev *cdev)
+int qed_slowpath_irq_req(struct qed_hwfn *hwfn)
{
- int i = 0, rc = 0;
+ struct qed_dev *cdev = hwfn->cdev;
+ int rc = 0;
+ u8 id;
if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) {
- /* Request all the slowpath MSI-X vectors */
- for (i = 0; i < cdev->num_hwfns; i++) {
- snprintf(cdev->hwfns[i].name, NAME_SIZE,
- "sp-%d-%02x:%02x.%02x",
- i, cdev->pdev->bus->number,
- PCI_SLOT(cdev->pdev->devfn),
- cdev->hwfns[i].abs_pf_id);
-
- rc = request_irq(cdev->int_params.msix_table[i].vector,
- qed_msix_sp_int, 0,
- cdev->hwfns[i].name,
- cdev->hwfns[i].sp_dpc);
- if (rc)
- break;
-
- DP_VERBOSE(&cdev->hwfns[i],
- (NETIF_MSG_INTR | QED_MSG_SP),
+ id = hwfn->my_id;
+ snprintf(hwfn->name, NAME_SIZE, "sp-%d-%02x:%02x.%02x",
+ id, cdev->pdev->bus->number,
+ PCI_SLOT(cdev->pdev->devfn), hwfn->abs_pf_id);
+ rc = request_irq(cdev->int_params.msix_table[id].vector,
+ qed_msix_sp_int, 0, hwfn->name, hwfn->sp_dpc);
+ if (!rc)
+ DP_VERBOSE(hwfn, (NETIF_MSG_INTR | QED_MSG_SP),
"Requested slowpath MSI-X\n");
- }
-
- if (i != cdev->num_hwfns) {
- /* Free already request MSI-X vectors */
- for (i--; i >= 0; i--) {
- unsigned int vec =
- cdev->int_params.msix_table[i].vector;
- synchronize_irq(vec);
- free_irq(cdev->int_params.msix_table[i].vector,
- cdev->hwfns[i].sp_dpc);
- }
- }
} else {
unsigned long flags = 0;
@@ -534,13 +515,17 @@ static void qed_slowpath_irq_free(struct qed_dev *cdev)
if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) {
for_each_hwfn(cdev, i) {
+ if (!cdev->hwfns[i].b_int_requested)
+ break;
synchronize_irq(cdev->int_params.msix_table[i].vector);
free_irq(cdev->int_params.msix_table[i].vector,
cdev->hwfns[i].sp_dpc);
}
} else {
- free_irq(cdev->pdev->irq, cdev);
+ if (QED_LEADING_HWFN(cdev)->b_int_requested)
+ free_irq(cdev->pdev->irq, cdev);
}
+ qed_int_disable_post_isr_release(cdev);
}
static int qed_nic_stop(struct qed_dev *cdev)
@@ -765,16 +750,11 @@ static int qed_slowpath_start(struct qed_dev *cdev,
if (rc)
goto err1;
- /* Request the slowpath IRQ */
- rc = qed_slowpath_irq_req(cdev);
- if (rc)
- goto err2;
-
/* Allocate stream for unzipping */
rc = qed_alloc_stream_mem(cdev);
if (rc) {
DP_NOTICE(cdev, "Failed to allocate stream memory\n");
- goto err3;
+ goto err2;
}
/* Start the slowpath */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index 7a5ce5914ace..e8df12335a97 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -363,4 +363,8 @@
0x7 << 0)
#define MCP_REG_NVM_CFG4_FLASH_SIZE_SHIFT \
0
+#define PGLUE_B_REG_PF_BAR0_SIZE \
+ 0x2aae60UL
+#define PGLUE_B_REG_PF_BAR1_SIZE \
+ 0x2aae64UL
#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index 31a1f1eb4f56..287fadfab52d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -124,8 +124,12 @@ struct qed_spq {
dma_addr_t p_phys;
struct qed_spq_entry *p_virt;
- /* Used as index for completions (returns on EQ by FW) */
- u16 echo_idx;
+#define SPQ_RING_SIZE \
+ (CORE_SPQE_PAGE_SIZE_BYTES / sizeof(struct slow_path_element))
+
+ /* Bitmap for handling out-of-order completions */
+ DECLARE_BITMAP(p_comp_bitmap, SPQ_RING_SIZE);
+ u8 comp_bitmap_idx;
/* Statistics */
u32 unlimited_pending_count;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index 7c0b8459666e..3dd548ab8df1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -112,8 +112,6 @@ static int
qed_spq_fill_entry(struct qed_hwfn *p_hwfn,
struct qed_spq_entry *p_ent)
{
- p_ent->elem.hdr.echo = 0;
- p_hwfn->p_spq->echo_idx++;
p_ent->flags = 0;
switch (p_ent->comp_mode) {
@@ -195,10 +193,12 @@ static int qed_spq_hw_post(struct qed_hwfn *p_hwfn,
struct qed_spq *p_spq,
struct qed_spq_entry *p_ent)
{
- struct qed_chain *p_chain = &p_hwfn->p_spq->chain;
+ struct qed_chain *p_chain = &p_hwfn->p_spq->chain;
+ u16 echo = qed_chain_get_prod_idx(p_chain);
struct slow_path_element *elem;
struct core_db_data db;
+ p_ent->elem.hdr.echo = cpu_to_le16(echo);
elem = qed_chain_produce(p_chain);
if (!elem) {
DP_NOTICE(p_hwfn, "Failed to produce from SPQ chain\n");
@@ -437,7 +437,9 @@ void qed_spq_setup(struct qed_hwfn *p_hwfn)
p_spq->comp_count = 0;
p_spq->comp_sent_count = 0;
p_spq->unlimited_pending_count = 0;
- p_spq->echo_idx = 0;
+
+ bitmap_zero(p_spq->p_comp_bitmap, SPQ_RING_SIZE);
+ p_spq->comp_bitmap_idx = 0;
/* SPQ cid, cannot fail */
qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_CORE, &p_spq->cid);
@@ -582,26 +584,32 @@ qed_spq_add_entry(struct qed_hwfn *p_hwfn,
struct qed_spq *p_spq = p_hwfn->p_spq;
if (p_ent->queue == &p_spq->unlimited_pending) {
- struct qed_spq_entry *p_en2;
if (list_empty(&p_spq->free_pool)) {
list_add_tail(&p_ent->list, &p_spq->unlimited_pending);
p_spq->unlimited_pending_count++;
return 0;
- }
+ } else {
+ struct qed_spq_entry *p_en2;
- p_en2 = list_first_entry(&p_spq->free_pool,
- struct qed_spq_entry,
- list);
- list_del(&p_en2->list);
+ p_en2 = list_first_entry(&p_spq->free_pool,
+ struct qed_spq_entry,
+ list);
+ list_del(&p_en2->list);
+
+ /* Copy the ring element physical pointer to the new
+ * entry, since we are about to override the entire ring
+ * entry and don't want to lose the pointer.
+ */
+ p_ent->elem.data_ptr = p_en2->elem.data_ptr;
- /* Strcut assignment */
- *p_en2 = *p_ent;
+ *p_en2 = *p_ent;
- kfree(p_ent);
+ kfree(p_ent);
- p_ent = p_en2;
+ p_ent = p_en2;
+ }
}
/* entry is to be placed in 'pending' queue */
@@ -777,13 +785,38 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending,
list) {
if (p_ent->elem.hdr.echo == echo) {
+ u16 pos = le16_to_cpu(echo) % SPQ_RING_SIZE;
+
list_del(&p_ent->list);
- qed_chain_return_produced(&p_spq->chain);
+ /* Avoid overriding of SPQ entries when getting
+ * out-of-order completions, by marking the completions
+ * in a bitmap and increasing the chain consumer only
+ * for the first successive completed entries.
+ */
+ bitmap_set(p_spq->p_comp_bitmap, pos, SPQ_RING_SIZE);
+
+ while (test_bit(p_spq->comp_bitmap_idx,
+ p_spq->p_comp_bitmap)) {
+ bitmap_clear(p_spq->p_comp_bitmap,
+ p_spq->comp_bitmap_idx,
+ SPQ_RING_SIZE);
+ p_spq->comp_bitmap_idx++;
+ qed_chain_return_produced(&p_spq->chain);
+ }
+
p_spq->comp_count++;
found = p_ent;
break;
}
+
+ /* This is relatively uncommon - depends on scenarios
+ * which have mutliple per-PF sent ramrods.
+ */
+ DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+ "Got completion for echo %04x - doesn't match echo %04x in completion pending list\n",
+ le16_to_cpu(echo),
+ le16_to_cpu(p_ent->elem.hdr.echo));
}
/* Release lock before callback, as callback may post
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c
index be7d7a62cc0d..34906750b7e7 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c
@@ -246,12 +246,13 @@ int qlcnic_83xx_check_vnic_state(struct qlcnic_adapter *adapter)
u32 state;
state = QLCRDX(ahw, QLC_83XX_VNIC_STATE);
- while (state != QLCNIC_DEV_NPAR_OPER && idc->vnic_wait_limit--) {
+ while (state != QLCNIC_DEV_NPAR_OPER && idc->vnic_wait_limit) {
+ idc->vnic_wait_limit--;
msleep(1000);
state = QLCRDX(ahw, QLC_83XX_VNIC_STATE);
}
- if (!idc->vnic_wait_limit) {
+ if (state != QLCNIC_DEV_NPAR_OPER) {
dev_err(&adapter->pdev->dev,
"vNIC mode not operational, state check timed out.\n");
return -EIO;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
index a5f422f26cb4..daf05155b732 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
@@ -772,8 +772,10 @@ int qlcnic_82xx_config_intrpt(struct qlcnic_adapter *adapter, u8 op_type)
int i, err = 0;
for (i = 0; i < ahw->num_msix; i++) {
- qlcnic_alloc_mbx_args(&cmd, adapter,
- QLCNIC_CMD_MQ_TX_CONFIG_INTR);
+ err = qlcnic_alloc_mbx_args(&cmd, adapter,
+ QLCNIC_CMD_MQ_TX_CONFIG_INTR);
+ if (err)
+ return err;
type = op_type ? QLCNIC_INTRPT_ADD : QLCNIC_INTRPT_DEL;
val = type | (ahw->intr_tbl[i].type << 4);
if (ahw->intr_tbl[i].type == QLCNIC_INTRPT_MSIX)
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 02b7115b6aaa..997976426799 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -4211,8 +4211,9 @@ static int ql_change_rx_buffers(struct ql_adapter *qdev)
/* Wait for an outstanding reset to complete. */
if (!test_bit(QL_ADAPTER_UP, &qdev->flags)) {
- int i = 3;
- while (i-- && !test_bit(QL_ADAPTER_UP, &qdev->flags)) {
+ int i = 4;
+
+ while (--i && !test_bit(QL_ADAPTER_UP, &qdev->flags)) {
netif_err(qdev, ifup, qdev->ndev,
"Waiting for adapter UP...\n");
ssleep(1);
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index ddb2c6c6ec94..689a4a5c8dcf 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -736,9 +736,8 @@ qcaspi_netdev_tx_timeout(struct net_device *dev)
netdev_info(qca->net_dev, "Transmit timeout at %ld, latency %ld\n",
jiffies, jiffies - dev->trans_start);
qca->net_dev->stats.tx_errors++;
- /* wake the queue if there is room */
- if (qcaspi_tx_ring_has_space(&qca->txr))
- netif_wake_queue(dev);
+ /* Trigger tx queue flush and QCA7000 reset */
+ qca->sync = QCASPI_SYNC_UNKNOWN;
}
static int
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index b4f21232019a..79ef799f88ab 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -7429,15 +7429,15 @@ process_pkt:
rtl8169_rx_vlan_tag(desc, skb);
+ if (skb->pkt_type == PACKET_MULTICAST)
+ dev->stats.multicast++;
+
napi_gro_receive(&tp->napi, skb);
u64_stats_update_begin(&tp->rx_stats.syncp);
tp->rx_stats.packets++;
tp->rx_stats.bytes += pkt_size;
u64_stats_update_end(&tp->rx_stats.syncp);
-
- if (skb->pkt_type == PACKET_MULTICAST)
- dev->stats.multicast++;
}
release_descriptor:
desc->opts2 = 0;
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index aa7b2083cb53..467d41698fd5 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -408,8 +408,6 @@ static int ravb_dmac_init(struct net_device *ndev)
/* Interrupt enable: */
/* Frame receive */
ravb_write(ndev, RIC0_FRE0 | RIC0_FRE1, RIC0);
- /* Receive FIFO full warning */
- ravb_write(ndev, RIC1_RFWE, RIC1);
/* Receive FIFO full error, descriptor empty */
ravb_write(ndev, RIC2_QFE0 | RIC2_QFE1 | RIC2_RFFE, RIC2);
/* Frame transmitted, timestamp FIFO updated */
@@ -733,8 +731,10 @@ static irqreturn_t ravb_interrupt(int irq, void *dev_id)
((tis & tic) & BIT(q))) {
if (napi_schedule_prep(&priv->napi[q])) {
/* Mask RX and TX interrupts */
- ravb_write(ndev, ric0 & ~BIT(q), RIC0);
- ravb_write(ndev, tic & ~BIT(q), TIC);
+ ric0 &= ~BIT(q);
+ tic &= ~BIT(q);
+ ravb_write(ndev, ric0, RIC0);
+ ravb_write(ndev, tic, TIC);
__napi_schedule(&priv->napi[q]);
} else {
netdev_warn(ndev,
@@ -905,6 +905,9 @@ static int ravb_phy_init(struct net_device *ndev)
netdev_info(ndev, "limited PHY to 100Mbit/s\n");
}
+ /* 10BASE is not supported */
+ phydev->supported &= ~PHY_10BT_FEATURES;
+
netdev_info(ndev, "attached PHY %d (IRQ %d) to driver %s\n",
phydev->addr, phydev->irq, phydev->drv->name);
@@ -1037,7 +1040,7 @@ static const char ravb_gstrings_stats[][ETH_GSTRING_LEN] = {
"rx_queue_1_mcast_packets",
"rx_queue_1_errors",
"rx_queue_1_crc_errors",
- "rx_queue_1_frame_errors_",
+ "rx_queue_1_frame_errors",
"rx_queue_1_length_errors",
"rx_queue_1_missed_errors",
"rx_queue_1_over_errors",
@@ -1225,7 +1228,7 @@ static int ravb_open(struct net_device *ndev)
/* Device init */
error = ravb_dmac_init(ndev);
if (error)
- goto out_free_irq;
+ goto out_free_irq2;
ravb_emac_init(ndev);
/* Initialise PTP Clock driver */
@@ -1243,9 +1246,11 @@ static int ravb_open(struct net_device *ndev)
out_ptp_stop:
/* Stop PTP Clock driver */
ravb_ptp_stop(ndev);
+out_free_irq2:
+ if (priv->chip_id == RCAR_GEN3)
+ free_irq(priv->emac_irq, ndev);
out_free_irq:
free_irq(ndev->irq, ndev);
- free_irq(priv->emac_irq, ndev);
out_napi_off:
napi_disable(&priv->napi[RAVB_NC]);
napi_disable(&priv->napi[RAVB_BE]);
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index e7bab7909ed9..6a8fc0f341ff 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -52,6 +52,8 @@
NETIF_MSG_RX_ERR| \
NETIF_MSG_TX_ERR)
+#define SH_ETH_OFFSET_INVALID ((u16)~0)
+
#define SH_ETH_OFFSET_DEFAULTS \
[0 ... SH_ETH_MAX_REGISTER_OFFSET - 1] = SH_ETH_OFFSET_INVALID
@@ -404,6 +406,28 @@ static const u16 sh_eth_offset_fast_sh3_sh2[SH_ETH_MAX_REGISTER_OFFSET] = {
static void sh_eth_rcv_snd_disable(struct net_device *ndev);
static struct net_device_stats *sh_eth_get_stats(struct net_device *ndev);
+static void sh_eth_write(struct net_device *ndev, u32 data, int enum_index)
+{
+ struct sh_eth_private *mdp = netdev_priv(ndev);
+ u16 offset = mdp->reg_offset[enum_index];
+
+ if (WARN_ON(offset == SH_ETH_OFFSET_INVALID))
+ return;
+
+ iowrite32(data, mdp->addr + offset);
+}
+
+static u32 sh_eth_read(struct net_device *ndev, int enum_index)
+{
+ struct sh_eth_private *mdp = netdev_priv(ndev);
+ u16 offset = mdp->reg_offset[enum_index];
+
+ if (WARN_ON(offset == SH_ETH_OFFSET_INVALID))
+ return ~0U;
+
+ return ioread32(mdp->addr + offset);
+}
+
static bool sh_eth_is_gether(struct sh_eth_private *mdp)
{
return mdp->reg_offset == sh_eth_offset_gigabit;
@@ -1143,6 +1167,7 @@ static void sh_eth_ring_format(struct net_device *ndev)
int tx_ringsize = sizeof(*txdesc) * mdp->num_tx_ring;
int skbuff_size = mdp->rx_buf_sz + SH_ETH_RX_ALIGN + 32 - 1;
dma_addr_t dma_addr;
+ u32 buf_len;
mdp->cur_rx = 0;
mdp->cur_tx = 0;
@@ -1163,16 +1188,16 @@ static void sh_eth_ring_format(struct net_device *ndev)
/* RX descriptor */
rxdesc = &mdp->rx_ring[i];
/* The size of the buffer is a multiple of 32 bytes. */
- rxdesc->buffer_length = ALIGN(mdp->rx_buf_sz, 32);
- dma_addr = dma_map_single(&ndev->dev, skb->data,
- rxdesc->buffer_length,
+ buf_len = ALIGN(mdp->rx_buf_sz, 32);
+ rxdesc->len = cpu_to_edmac(mdp, buf_len << 16);
+ dma_addr = dma_map_single(&ndev->dev, skb->data, buf_len,
DMA_FROM_DEVICE);
if (dma_mapping_error(&ndev->dev, dma_addr)) {
kfree_skb(skb);
break;
}
mdp->rx_skbuff[i] = skb;
- rxdesc->addr = dma_addr;
+ rxdesc->addr = cpu_to_edmac(mdp, dma_addr);
rxdesc->status = cpu_to_edmac(mdp, RD_RACT | RD_RFP);
/* Rx descriptor address set */
@@ -1196,7 +1221,7 @@ static void sh_eth_ring_format(struct net_device *ndev)
mdp->tx_skbuff[i] = NULL;
txdesc = &mdp->tx_ring[i];
txdesc->status = cpu_to_edmac(mdp, TD_TFP);
- txdesc->buffer_length = 0;
+ txdesc->len = cpu_to_edmac(mdp, 0);
if (i == 0) {
/* Tx descriptor address set */
sh_eth_write(ndev, mdp->tx_desc_dma, TDLAR);
@@ -1403,8 +1428,10 @@ static int sh_eth_txfree(struct net_device *ndev)
entry, edmac_to_cpu(mdp, txdesc->status));
/* Free the original skb. */
if (mdp->tx_skbuff[entry]) {
- dma_unmap_single(&ndev->dev, txdesc->addr,
- txdesc->buffer_length, DMA_TO_DEVICE);
+ dma_unmap_single(&ndev->dev,
+ edmac_to_cpu(mdp, txdesc->addr),
+ edmac_to_cpu(mdp, txdesc->len) >> 16,
+ DMA_TO_DEVICE);
dev_kfree_skb_irq(mdp->tx_skbuff[entry]);
mdp->tx_skbuff[entry] = NULL;
free_num++;
@@ -1414,7 +1441,7 @@ static int sh_eth_txfree(struct net_device *ndev)
txdesc->status |= cpu_to_edmac(mdp, TD_TDLE);
ndev->stats.tx_packets++;
- ndev->stats.tx_bytes += txdesc->buffer_length;
+ ndev->stats.tx_bytes += edmac_to_cpu(mdp, txdesc->len) >> 16;
}
return free_num;
}
@@ -1433,6 +1460,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
u32 desc_status;
int skbuff_size = mdp->rx_buf_sz + SH_ETH_RX_ALIGN + 32 - 1;
dma_addr_t dma_addr;
+ u32 buf_len;
boguscnt = min(boguscnt, *quota);
limit = boguscnt;
@@ -1441,7 +1469,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
/* RACT bit must be checked before all the following reads */
dma_rmb();
desc_status = edmac_to_cpu(mdp, rxdesc->status);
- pkt_len = rxdesc->frame_length;
+ pkt_len = edmac_to_cpu(mdp, rxdesc->len) & RD_RFL;
if (--boguscnt < 0)
break;
@@ -1462,6 +1490,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
if (mdp->cd->shift_rd0)
desc_status >>= 16;
+ skb = mdp->rx_skbuff[entry];
if (desc_status & (RD_RFS1 | RD_RFS2 | RD_RFS3 | RD_RFS4 |
RD_RFS5 | RD_RFS6 | RD_RFS10)) {
ndev->stats.rx_errors++;
@@ -1477,16 +1506,16 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
ndev->stats.rx_missed_errors++;
if (desc_status & RD_RFS10)
ndev->stats.rx_over_errors++;
- } else {
+ } else if (skb) {
+ dma_addr = edmac_to_cpu(mdp, rxdesc->addr);
if (!mdp->cd->hw_swap)
sh_eth_soft_swap(
- phys_to_virt(ALIGN(rxdesc->addr, 4)),
+ phys_to_virt(ALIGN(dma_addr, 4)),
pkt_len + 2);
- skb = mdp->rx_skbuff[entry];
mdp->rx_skbuff[entry] = NULL;
if (mdp->cd->rpadir)
skb_reserve(skb, NET_IP_ALIGN);
- dma_unmap_single(&ndev->dev, rxdesc->addr,
+ dma_unmap_single(&ndev->dev, dma_addr,
ALIGN(mdp->rx_buf_sz, 32),
DMA_FROM_DEVICE);
skb_put(skb, pkt_len);
@@ -1506,7 +1535,8 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
entry = mdp->dirty_rx % mdp->num_rx_ring;
rxdesc = &mdp->rx_ring[entry];
/* The size of the buffer is 32 byte boundary. */
- rxdesc->buffer_length = ALIGN(mdp->rx_buf_sz, 32);
+ buf_len = ALIGN(mdp->rx_buf_sz, 32);
+ rxdesc->len = cpu_to_edmac(mdp, buf_len << 16);
if (mdp->rx_skbuff[entry] == NULL) {
skb = netdev_alloc_skb(ndev, skbuff_size);
@@ -1514,8 +1544,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
break; /* Better luck next round. */
sh_eth_set_receive_align(skb);
dma_addr = dma_map_single(&ndev->dev, skb->data,
- rxdesc->buffer_length,
- DMA_FROM_DEVICE);
+ buf_len, DMA_FROM_DEVICE);
if (dma_mapping_error(&ndev->dev, dma_addr)) {
kfree_skb(skb);
break;
@@ -1523,7 +1552,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
mdp->rx_skbuff[entry] = skb;
skb_checksum_none_assert(skb);
- rxdesc->addr = dma_addr;
+ rxdesc->addr = cpu_to_edmac(mdp, dma_addr);
}
dma_wmb(); /* RACT bit must be set after all the above writes */
if (entry >= mdp->num_rx_ring - 1)
@@ -2331,8 +2360,8 @@ static void sh_eth_tx_timeout(struct net_device *ndev)
/* Free all the skbuffs in the Rx queue. */
for (i = 0; i < mdp->num_rx_ring; i++) {
rxdesc = &mdp->rx_ring[i];
- rxdesc->status = 0;
- rxdesc->addr = 0xBADF00D0;
+ rxdesc->status = cpu_to_edmac(mdp, 0);
+ rxdesc->addr = cpu_to_edmac(mdp, 0xBADF00D0);
dev_kfree_skb(mdp->rx_skbuff[i]);
mdp->rx_skbuff[i] = NULL;
}
@@ -2350,6 +2379,7 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev)
{
struct sh_eth_private *mdp = netdev_priv(ndev);
struct sh_eth_txdesc *txdesc;
+ dma_addr_t dma_addr;
u32 entry;
unsigned long flags;
@@ -2372,15 +2402,15 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev)
txdesc = &mdp->tx_ring[entry];
/* soft swap. */
if (!mdp->cd->hw_swap)
- sh_eth_soft_swap(phys_to_virt(ALIGN(txdesc->addr, 4)),
- skb->len + 2);
- txdesc->addr = dma_map_single(&ndev->dev, skb->data, skb->len,
- DMA_TO_DEVICE);
- if (dma_mapping_error(&ndev->dev, txdesc->addr)) {
+ sh_eth_soft_swap(PTR_ALIGN(skb->data, 4), skb->len + 2);
+ dma_addr = dma_map_single(&ndev->dev, skb->data, skb->len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&ndev->dev, dma_addr)) {
kfree_skb(skb);
return NETDEV_TX_OK;
}
- txdesc->buffer_length = skb->len;
+ txdesc->addr = cpu_to_edmac(mdp, dma_addr);
+ txdesc->len = cpu_to_edmac(mdp, skb->len << 16);
dma_wmb(); /* TACT bit must be set after all the above writes */
if (entry >= mdp->num_tx_ring - 1)
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index 50382b1c9ddc..72fcfc924589 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -283,7 +283,7 @@ enum DMAC_IM_BIT {
DMAC_M_RINT1 = 0x00000001,
};
-/* Receive descriptor bit */
+/* Receive descriptor 0 bits */
enum RD_STS_BIT {
RD_RACT = 0x80000000, RD_RDLE = 0x40000000,
RD_RFP1 = 0x20000000, RD_RFP0 = 0x10000000,
@@ -298,6 +298,12 @@ enum RD_STS_BIT {
#define RDFEND RD_RFP0
#define RD_RFP (RD_RFP1|RD_RFP0)
+/* Receive descriptor 1 bits */
+enum RD_LEN_BIT {
+ RD_RFL = 0x0000ffff, /* receive frame length */
+ RD_RBL = 0xffff0000, /* receive buffer length */
+};
+
/* FCFTR */
enum FCFTR_BIT {
FCFTR_RFF2 = 0x00040000, FCFTR_RFF1 = 0x00020000,
@@ -307,7 +313,7 @@ enum FCFTR_BIT {
#define DEFAULT_FIFO_F_D_RFF (FCFTR_RFF2 | FCFTR_RFF1 | FCFTR_RFF0)
#define DEFAULT_FIFO_F_D_RFD (FCFTR_RFD2 | FCFTR_RFD1 | FCFTR_RFD0)
-/* Transmit descriptor bit */
+/* Transmit descriptor 0 bits */
enum TD_STS_BIT {
TD_TACT = 0x80000000, TD_TDLE = 0x40000000,
TD_TFP1 = 0x20000000, TD_TFP0 = 0x10000000,
@@ -317,6 +323,11 @@ enum TD_STS_BIT {
#define TDFEND TD_TFP0
#define TD_TFP (TD_TFP1|TD_TFP0)
+/* Transmit descriptor 1 bits */
+enum TD_LEN_BIT {
+ TD_TBL = 0xffff0000, /* transmit buffer length */
+};
+
/* RMCR */
enum RMCR_BIT {
RMCR_RNC = 0x00000001,
@@ -425,15 +436,9 @@ enum TSU_FWSLC_BIT {
*/
struct sh_eth_txdesc {
u32 status; /* TD0 */
-#if defined(__LITTLE_ENDIAN)
- u16 pad0; /* TD1 */
- u16 buffer_length; /* TD1 */
-#else
- u16 buffer_length; /* TD1 */
- u16 pad0; /* TD1 */
-#endif
+ u32 len; /* TD1 */
u32 addr; /* TD2 */
- u32 pad1; /* padding data */
+ u32 pad0; /* padding data */
} __aligned(2) __packed;
/* The sh ether Rx buffer descriptors.
@@ -441,13 +446,7 @@ struct sh_eth_txdesc {
*/
struct sh_eth_rxdesc {
u32 status; /* RD0 */
-#if defined(__LITTLE_ENDIAN)
- u16 frame_length; /* RD1 */
- u16 buffer_length; /* RD1 */
-#else
- u16 buffer_length; /* RD1 */
- u16 frame_length; /* RD1 */
-#endif
+ u32 len; /* RD1 */
u32 addr; /* RD2 */
u32 pad0; /* padding data */
} __aligned(2) __packed;
@@ -546,31 +545,6 @@ static inline void sh_eth_soft_swap(char *src, int len)
#endif
}
-#define SH_ETH_OFFSET_INVALID ((u16) ~0)
-
-static inline void sh_eth_write(struct net_device *ndev, u32 data,
- int enum_index)
-{
- struct sh_eth_private *mdp = netdev_priv(ndev);
- u16 offset = mdp->reg_offset[enum_index];
-
- if (WARN_ON(offset == SH_ETH_OFFSET_INVALID))
- return;
-
- iowrite32(data, mdp->addr + offset);
-}
-
-static inline u32 sh_eth_read(struct net_device *ndev, int enum_index)
-{
- struct sh_eth_private *mdp = netdev_priv(ndev);
- u16 offset = mdp->reg_offset[enum_index];
-
- if (WARN_ON(offset == SH_ETH_OFFSET_INVALID))
- return ~0U;
-
- return ioread32(mdp->addr + offset);
-}
-
static inline void *sh_eth_tsu_get_offset(struct sh_eth_private *mdp,
int enum_index)
{
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index bc6d21b471be..e6a084a6be12 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -3299,7 +3299,8 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
new_spec.priority = EFX_FILTER_PRI_AUTO;
new_spec.flags = (EFX_FILTER_FLAG_RX |
- EFX_FILTER_FLAG_RX_RSS);
+ (efx_rss_enabled(efx) ?
+ EFX_FILTER_FLAG_RX_RSS : 0));
new_spec.dmaq_id = 0;
new_spec.rss_context = EFX_FILTER_RSS_CONTEXT_DEFAULT;
rc = efx_ef10_filter_push(efx, &new_spec,
@@ -3921,6 +3922,7 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
{
struct efx_ef10_filter_table *table = efx->filter_state;
struct efx_ef10_dev_addr *addr_list;
+ enum efx_filter_flags filter_flags;
struct efx_filter_spec spec;
u8 baddr[ETH_ALEN];
unsigned int i, j;
@@ -3935,11 +3937,11 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
addr_count = table->dev_uc_count;
}
+ filter_flags = efx_rss_enabled(efx) ? EFX_FILTER_FLAG_RX_RSS : 0;
+
/* Insert/renew filters */
for (i = 0; i < addr_count; i++) {
- efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO,
- EFX_FILTER_FLAG_RX_RSS,
- 0);
+ efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
efx_filter_set_eth_local(&spec, EFX_FILTER_VID_UNSPEC,
addr_list[i].addr);
rc = efx_ef10_filter_insert(efx, &spec, true);
@@ -3968,9 +3970,7 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
if (multicast && rollback) {
/* Also need an Ethernet broadcast filter */
- efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO,
- EFX_FILTER_FLAG_RX_RSS,
- 0);
+ efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
eth_broadcast_addr(baddr);
efx_filter_set_eth_local(&spec, EFX_FILTER_VID_UNSPEC, baddr);
rc = efx_ef10_filter_insert(efx, &spec, true);
@@ -4000,13 +4000,14 @@ static int efx_ef10_filter_insert_def(struct efx_nic *efx, bool multicast,
{
struct efx_ef10_filter_table *table = efx->filter_state;
struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ enum efx_filter_flags filter_flags;
struct efx_filter_spec spec;
u8 baddr[ETH_ALEN];
int rc;
- efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO,
- EFX_FILTER_FLAG_RX_RSS,
- 0);
+ filter_flags = efx_rss_enabled(efx) ? EFX_FILTER_FLAG_RX_RSS : 0;
+
+ efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
if (multicast)
efx_filter_set_mc_def(&spec);
@@ -4023,8 +4024,7 @@ static int efx_ef10_filter_insert_def(struct efx_nic *efx, bool multicast,
if (!nic_data->workaround_26807) {
/* Also need an Ethernet broadcast filter */
efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO,
- EFX_FILTER_FLAG_RX_RSS,
- 0);
+ filter_flags, 0);
eth_broadcast_addr(baddr);
efx_filter_set_eth_local(&spec, EFX_FILTER_VID_UNSPEC,
baddr);
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index d288f1c928de..a3c42a376741 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -3422,7 +3422,7 @@ out:
* with our request for slot reset the mmio_enabled callback will never be
* called, and the link_reset callback is not used by AER or EEH mechanisms.
*/
-static struct pci_error_handlers efx_err_handlers = {
+static const struct pci_error_handlers efx_err_handlers = {
.error_detected = efx_io_error_detected,
.slot_reset = efx_io_slot_reset,
.resume = efx_io_resume,
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 1aaf76c1ace8..10827476bc0b 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -76,6 +76,11 @@ void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
#define EFX_TXQ_MAX_ENT(efx) (EFX_WORKAROUND_35388(efx) ? \
EFX_MAX_DMAQ_SIZE / 2 : EFX_MAX_DMAQ_SIZE)
+static inline bool efx_rss_enabled(struct efx_nic *efx)
+{
+ return efx->rss_spread > 1;
+}
+
/* Filters */
void efx_mac_reconfigure(struct efx_nic *efx);
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 5a1c5a8f278a..133e9e35be9e 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -2242,7 +2242,7 @@ efx_farch_filter_init_rx_auto(struct efx_nic *efx,
*/
spec->priority = EFX_FILTER_PRI_AUTO;
spec->flags = (EFX_FILTER_FLAG_RX |
- (efx->n_rx_channels > 1 ? EFX_FILTER_FLAG_RX_RSS : 0) |
+ (efx_rss_enabled(efx) ? EFX_FILTER_FLAG_RX_RSS : 0) |
(efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0));
spec->dmaq_id = 0;
}
diff --git a/drivers/net/ethernet/sfc/txc43128_phy.c b/drivers/net/ethernet/sfc/txc43128_phy.c
index 3d5ee3259885..194f67d9f3bf 100644
--- a/drivers/net/ethernet/sfc/txc43128_phy.c
+++ b/drivers/net/ethernet/sfc/txc43128_phy.c
@@ -418,7 +418,7 @@ static void txc_reset_logic_mmd(struct efx_nic *efx, int mmd)
val |= (1 << TXC_GLCMD_LMTSWRST_LBN);
efx_mdio_write(efx, mmd, TXC_GLRGS_GLCMD, val);
- while (tries--) {
+ while (--tries) {
val = efx_mdio_read(efx, mmd, TXC_GLRGS_GLCMD);
if (!(val & (1 << TXC_GLCMD_LMTSWRST_LBN)))
break;
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index c860c9007e49..219a99b7a631 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -809,22 +809,17 @@ static int smsc911x_phy_check_loopbackpkt(struct smsc911x_data *pdata)
static int smsc911x_phy_reset(struct smsc911x_data *pdata)
{
- struct phy_device *phy_dev = pdata->phy_dev;
unsigned int temp;
unsigned int i = 100000;
- BUG_ON(!phy_dev);
- BUG_ON(!phy_dev->bus);
-
- SMSC_TRACE(pdata, hw, "Performing PHY BCR Reset");
- smsc911x_mii_write(phy_dev->bus, phy_dev->addr, MII_BMCR, BMCR_RESET);
+ temp = smsc911x_reg_read(pdata, PMT_CTRL);
+ smsc911x_reg_write(pdata, PMT_CTRL, temp | PMT_CTRL_PHY_RST_);
do {
msleep(1);
- temp = smsc911x_mii_read(phy_dev->bus, phy_dev->addr,
- MII_BMCR);
- } while ((i--) && (temp & BMCR_RESET));
+ temp = smsc911x_reg_read(pdata, PMT_CTRL);
+ } while ((i--) && (temp & PMT_CTRL_PHY_RST_));
- if (temp & BMCR_RESET) {
+ if (unlikely(temp & PMT_CTRL_PHY_RST_)) {
SMSC_WARN(pdata, hw, "PHY reset failed to complete");
return -EIO;
}
@@ -2296,7 +2291,7 @@ static int smsc911x_init(struct net_device *dev)
}
/* Reset the LAN911x */
- if (smsc911x_soft_reset(pdata))
+ if (smsc911x_phy_reset(pdata) || smsc911x_soft_reset(pdata))
return -ENODEV;
dev->flags |= IFF_MULTICAST;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index 9d89bdbf029f..82de68b1a452 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -337,11 +337,11 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
QSGMII_PHY_RX_SIGNAL_DETECT_EN |
QSGMII_PHY_TX_DRIVER_EN |
QSGMII_PHY_QSGMII_EN |
- 0x4 << QSGMII_PHY_PHASE_LOOP_GAIN_OFFSET |
- 0x3 << QSGMII_PHY_RX_DC_BIAS_OFFSET |
- 0x1 << QSGMII_PHY_RX_INPUT_EQU_OFFSET |
- 0x2 << QSGMII_PHY_CDR_PI_SLEW_OFFSET |
- 0xC << QSGMII_PHY_TX_DRV_AMP_OFFSET);
+ 0x4ul << QSGMII_PHY_PHASE_LOOP_GAIN_OFFSET |
+ 0x3ul << QSGMII_PHY_RX_DC_BIAS_OFFSET |
+ 0x1ul << QSGMII_PHY_RX_INPUT_EQU_OFFSET |
+ 0x2ul << QSGMII_PHY_CDR_PI_SLEW_OFFSET |
+ 0xCul << QSGMII_PHY_TX_DRV_AMP_OFFSET);
}
plat_dat->has_gmac = true;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index 7f6f4a4fcc70..58c05acc2aab 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
@@ -299,16 +299,17 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac,
if (IS_PHY_IF_MODE_GBIT(dwmac->interface)) {
const char *rs;
+ dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN;
+
err = of_property_read_string(np, "st,tx-retime-src", &rs);
if (err < 0) {
dev_warn(dev, "Use internal clock source\n");
- dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN;
- } else if (!strcasecmp(rs, "clk_125")) {
- dwmac->tx_retime_src = TX_RETIME_SRC_CLK_125;
- } else if (!strcasecmp(rs, "txclk")) {
- dwmac->tx_retime_src = TX_RETIME_SRC_TXCLK;
+ } else {
+ if (!strcasecmp(rs, "clk_125"))
+ dwmac->tx_retime_src = TX_RETIME_SRC_CLK_125;
+ else if (!strcasecmp(rs, "txclk"))
+ dwmac->tx_retime_src = TX_RETIME_SRC_TXCLK;
}
-
dwmac->speed = SPEED_1000;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
index 52b8ed9bd87c..adff46375a32 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
@@ -153,7 +153,11 @@ static int sun7i_gmac_probe(struct platform_device *pdev)
if (ret)
return ret;
- return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+ ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+ if (ret)
+ sun7i_gmac_exit(pdev, plat_dat->bsp_priv);
+
+ return ret;
}
static const struct of_device_id sun7i_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 64d8aa4e0cad..a5b869eb4678 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -185,7 +185,7 @@ static void stmmac_clk_csr_set(struct stmmac_priv *priv)
priv->clk_csr = STMMAC_CSR_100_150M;
else if ((clk_rate >= CSR_F_150M) && (clk_rate < CSR_F_250M))
priv->clk_csr = STMMAC_CSR_150_250M;
- else if ((clk_rate >= CSR_F_250M) && (clk_rate < CSR_F_300M))
+ else if ((clk_rate >= CSR_F_250M) && (clk_rate <= CSR_F_300M))
priv->clk_csr = STMMAC_CSR_250_300M;
}
}
@@ -2232,6 +2232,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
frame_len = priv->hw->desc->get_rx_frame_len(p, coe);
+ /* check if frame_len fits the preallocated memory */
+ if (frame_len > priv->dma_buf_sz) {
+ priv->dev->stats.rx_length_errors++;
+ break;
+ }
+
/* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3
* Type frames (LLC/LLC-SNAP)
*/
@@ -3040,8 +3046,6 @@ int stmmac_suspend(struct net_device *ndev)
priv->hw->dma->stop_tx(priv->ioaddr);
priv->hw->dma->stop_rx(priv->ioaddr);
- stmmac_clear_descriptors(priv);
-
/* Enable Power down mode by programming the PMT regs */
if (device_may_wakeup(priv->device)) {
priv->hw->mac->pmt(priv->hw, priv->wolopts);
@@ -3099,9 +3103,15 @@ int stmmac_resume(struct net_device *ndev)
netif_device_attach(ndev);
- init_dma_desc_rings(ndev, GFP_ATOMIC);
+ priv->cur_rx = 0;
+ priv->dirty_rx = 0;
+ priv->dirty_tx = 0;
+ priv->cur_tx = 0;
+ stmmac_clear_descriptors(priv);
+
stmmac_hw_setup(ndev, false);
stmmac_init_tx_coalesce(priv);
+ stmmac_set_rx_mode(ndev);
napi_enable(&priv->napi);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index ebf6abc4853f..bba670c42e37 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -138,7 +138,6 @@ int stmmac_mdio_reset(struct mii_bus *bus)
#ifdef CONFIG_OF
if (priv->device->of_node) {
- int reset_gpio, active_low;
if (data->reset_gpio < 0) {
struct device_node *np = priv->device->of_node;
@@ -154,24 +153,23 @@ int stmmac_mdio_reset(struct mii_bus *bus)
"snps,reset-active-low");
of_property_read_u32_array(np,
"snps,reset-delays-us", data->delays, 3);
- }
- reset_gpio = data->reset_gpio;
- active_low = data->active_low;
+ if (gpio_request(data->reset_gpio, "mdio-reset"))
+ return 0;
+ }
- if (!gpio_request(reset_gpio, "mdio-reset")) {
- gpio_direction_output(reset_gpio, active_low ? 1 : 0);
- if (data->delays[0])
- msleep(DIV_ROUND_UP(data->delays[0], 1000));
+ gpio_direction_output(data->reset_gpio,
+ data->active_low ? 1 : 0);
+ if (data->delays[0])
+ msleep(DIV_ROUND_UP(data->delays[0], 1000));
- gpio_set_value(reset_gpio, active_low ? 0 : 1);
- if (data->delays[1])
- msleep(DIV_ROUND_UP(data->delays[1], 1000));
+ gpio_set_value(data->reset_gpio, data->active_low ? 0 : 1);
+ if (data->delays[1])
+ msleep(DIV_ROUND_UP(data->delays[1], 1000));
- gpio_set_value(reset_gpio, active_low ? 1 : 0);
- if (data->delays[2])
- msleep(DIV_ROUND_UP(data->delays[2], 1000));
- }
+ gpio_set_value(data->reset_gpio, data->active_low ? 1 : 0);
+ if (data->delays[2])
+ msleep(DIV_ROUND_UP(data->delays[2], 1000));
}
#endif
diff --git a/drivers/net/ethernet/ti/cpsw-common.c b/drivers/net/ethernet/ti/cpsw-common.c
index c08be62bceba..1562ab4151e1 100644
--- a/drivers/net/ethernet/ti/cpsw-common.c
+++ b/drivers/net/ethernet/ti/cpsw-common.c
@@ -78,6 +78,9 @@ static int cpsw_am33xx_cm_get_macid(struct device *dev, u16 offset, int slave,
int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr)
{
+ if (of_machine_is_compatible("ti,dm8148"))
+ return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr);
+
if (of_machine_is_compatible("ti,am33xx"))
return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr);
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 48b92c9de12a..fc958067d10a 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2026,45 +2026,54 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
for_each_child_of_node(node, slave_node) {
struct cpsw_slave_data *slave_data = data->slave_data + i;
const void *mac_addr = NULL;
- u32 phyid;
int lenp;
const __be32 *parp;
- struct device_node *mdio_node;
- struct platform_device *mdio;
/* This is no slave child node, continue */
if (strcmp(slave_node->name, "slave"))
continue;
priv->phy_node = of_parse_phandle(slave_node, "phy-handle", 0);
+ parp = of_get_property(slave_node, "phy_id", &lenp);
if (of_phy_is_fixed_link(slave_node)) {
- struct phy_device *pd;
+ struct device_node *phy_node;
+ struct phy_device *phy_dev;
+ /* In the case of a fixed PHY, the DT node associated
+ * to the PHY is the Ethernet MAC DT node.
+ */
ret = of_phy_register_fixed_link(slave_node);
if (ret)
return ret;
- pd = of_phy_find_device(slave_node);
- if (!pd)
+ phy_node = of_node_get(slave_node);
+ phy_dev = of_phy_find_device(phy_node);
+ if (!phy_dev)
return -ENODEV;
snprintf(slave_data->phy_id, sizeof(slave_data->phy_id),
- PHY_ID_FMT, pd->bus->id, pd->phy_id);
- goto no_phy_slave;
- }
- parp = of_get_property(slave_node, "phy_id", &lenp);
- if ((parp == NULL) || (lenp != (sizeof(void *) * 2))) {
- dev_err(&pdev->dev, "Missing slave[%d] phy_id property\n", i);
+ PHY_ID_FMT, phy_dev->bus->id, phy_dev->addr);
+ } else if (parp) {
+ u32 phyid;
+ struct device_node *mdio_node;
+ struct platform_device *mdio;
+
+ if (lenp != (sizeof(__be32) * 2)) {
+ dev_err(&pdev->dev, "Invalid slave[%d] phy_id property\n", i);
+ goto no_phy_slave;
+ }
+ mdio_node = of_find_node_by_phandle(be32_to_cpup(parp));
+ phyid = be32_to_cpup(parp+1);
+ mdio = of_find_device_by_node(mdio_node);
+ of_node_put(mdio_node);
+ if (!mdio) {
+ dev_err(&pdev->dev, "Missing mdio platform device\n");
+ return -EINVAL;
+ }
+ snprintf(slave_data->phy_id, sizeof(slave_data->phy_id),
+ PHY_ID_FMT, mdio->name, phyid);
+ } else {
+ dev_err(&pdev->dev, "No slave[%d] phy_id or fixed-link property\n", i);
goto no_phy_slave;
}
- mdio_node = of_find_node_by_phandle(be32_to_cpup(parp));
- phyid = be32_to_cpup(parp+1);
- mdio = of_find_device_by_node(mdio_node);
- of_node_put(mdio_node);
- if (!mdio) {
- dev_err(&pdev->dev, "Missing mdio platform device\n");
- return -EINVAL;
- }
- snprintf(slave_data->phy_id, sizeof(slave_data->phy_id),
- PHY_ID_FMT, mdio->name, phyid);
slave_data->phy_if = of_get_phy_mode(slave_node);
if (slave_data->phy_if < 0) {
dev_err(&pdev->dev, "Missing or malformed slave[%d] phy-mode property\n",
@@ -2418,7 +2427,7 @@ static int cpsw_probe(struct platform_device *pdev)
ndev->irq = platform_get_irq(pdev, 1);
if (ndev->irq < 0) {
dev_err(priv->dev, "error getting irq resource\n");
- ret = -ENOENT;
+ ret = ndev->irq;
goto clean_ale_ret;
}
@@ -2439,8 +2448,10 @@ static int cpsw_probe(struct platform_device *pdev)
/* RX IRQ */
irq = platform_get_irq(pdev, 1);
- if (irq < 0)
+ if (irq < 0) {
+ ret = irq;
goto clean_ale_ret;
+ }
priv->irqs_table[0] = irq;
ret = devm_request_irq(&pdev->dev, irq, cpsw_rx_interrupt,
@@ -2452,8 +2463,10 @@ static int cpsw_probe(struct platform_device *pdev)
/* TX IRQ */
irq = platform_get_irq(pdev, 2);
- if (irq < 0)
+ if (irq < 0) {
+ ret = irq;
goto clean_ale_ret;
+ }
priv->irqs_table[1] = irq;
ret = devm_request_irq(&pdev->dev, irq, cpsw_tx_interrupt,
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index ae68afd50a15..f38696ceee74 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -345,13 +345,6 @@ VELOCITY_PARAM(flow_control, "Enable flow control ability");
*/
VELOCITY_PARAM(speed_duplex, "Setting the speed and duplex mode");
-#define VAL_PKT_LEN_DEF 0
-/* ValPktLen[] is used for setting the checksum offload ability of NIC.
- 0: Receive frame with invalid layer 2 length (Default)
- 1: Drop frame with invalid layer 2 length
-*/
-VELOCITY_PARAM(ValPktLen, "Receiving or Drop invalid 802.3 frame");
-
#define WOL_OPT_DEF 0
#define WOL_OPT_MIN 0
#define WOL_OPT_MAX 7
@@ -494,7 +487,6 @@ static void velocity_get_options(struct velocity_opt *opts, int index,
velocity_set_int_opt(&opts->flow_cntl, flow_control[index], FLOW_CNTL_MIN, FLOW_CNTL_MAX, FLOW_CNTL_DEF, "flow_control", devname);
velocity_set_bool_opt(&opts->flags, IP_byte_align[index], IP_ALIG_DEF, VELOCITY_FLAGS_IP_ALIGN, "IP_byte_align", devname);
- velocity_set_bool_opt(&opts->flags, ValPktLen[index], VAL_PKT_LEN_DEF, VELOCITY_FLAGS_VAL_PKT_LEN, "ValPktLen", devname);
velocity_set_int_opt((int *) &opts->spd_dpx, speed_duplex[index], MED_LNK_MIN, MED_LNK_MAX, MED_LNK_DEF, "Media link mode", devname);
velocity_set_int_opt(&opts->wol_opts, wol_opts[index], WOL_OPT_MIN, WOL_OPT_MAX, WOL_OPT_DEF, "Wake On Lan options", devname);
opts->numrx = (opts->numrx & ~3);
@@ -2055,8 +2047,9 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx)
int pkt_len = le16_to_cpu(rd->rdesc0.len) & 0x3fff;
struct sk_buff *skb;
- if (rd->rdesc0.RSR & (RSR_STP | RSR_EDP)) {
- VELOCITY_PRT(MSG_LEVEL_VERBOSE, KERN_ERR " %s : the received frame spans multiple RDs.\n", vptr->netdev->name);
+ if (unlikely(rd->rdesc0.RSR & (RSR_STP | RSR_EDP | RSR_RL))) {
+ if (rd->rdesc0.RSR & (RSR_STP | RSR_EDP))
+ VELOCITY_PRT(MSG_LEVEL_VERBOSE, KERN_ERR " %s : the received frame spans multiple RDs.\n", vptr->netdev->name);
stats->rx_length_errors++;
return -EINVAL;
}
@@ -2069,17 +2062,6 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx)
dma_sync_single_for_cpu(vptr->dev, rd_info->skb_dma,
vptr->rx.buf_sz, DMA_FROM_DEVICE);
- /*
- * Drop frame not meeting IEEE 802.3
- */
-
- if (vptr->flags & VELOCITY_FLAGS_VAL_PKT_LEN) {
- if (rd->rdesc0.RSR & RSR_RL) {
- stats->rx_length_errors++;
- return -EINVAL;
- }
- }
-
velocity_rx_csum(rd, skb);
if (velocity_rx_copy(&skb, pkt_len, vptr) < 0) {
diff --git a/drivers/net/fjes/fjes_hw.c b/drivers/net/fjes/fjes_hw.c
index bb8b5304d851..b103adb8d62e 100644
--- a/drivers/net/fjes/fjes_hw.c
+++ b/drivers/net/fjes/fjes_hw.c
@@ -599,7 +599,7 @@ int fjes_hw_unregister_buff_addr(struct fjes_hw *hw, int dest_epid)
FJES_CMD_REQ_RES_CODE_BUSY) &&
(timeout > 0)) {
msleep(200 + hw->my_epid * 20);
- timeout -= (200 + hw->my_epid * 20);
+ timeout -= (200 + hw->my_epid * 20);
res_buf->unshare_buffer.length = 0;
res_buf->unshare_buffer.code = 0;
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index de5c30c9f059..58efdec12f30 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -967,8 +967,6 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
err = udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev,
&fl6.saddr, &fl6.daddr, prio, ttl,
sport, geneve->dst_port, !udp_csum);
-
- iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return NETDEV_TX_OK;
tx_error:
@@ -1157,7 +1155,7 @@ static int geneve_configure(struct net *net, struct net_device *dev,
struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_dev *t, *geneve = netdev_priv(dev);
bool tun_collect_md, tun_on_same_port;
- int err;
+ int err, encap_len;
if (!remote)
return -EINVAL;
@@ -1189,6 +1187,14 @@ static int geneve_configure(struct net *net, struct net_device *dev,
if (t)
return -EBUSY;
+ /* make enough headroom for basic scenario */
+ encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
+ if (remote->sa.sa_family == AF_INET)
+ encap_len += sizeof(struct iphdr);
+ else
+ encap_len += sizeof(struct ipv6hdr);
+ dev->needed_headroom = encap_len + ETH_HLEN;
+
if (metadata) {
if (tun_on_same_port)
return -EPERM;
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 7c4a4151ef0f..5a1e98547031 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -683,14 +683,20 @@ static void sixpack_close(struct tty_struct *tty)
if (!atomic_dec_and_test(&sp->refcnt))
down(&sp->dead_sem);
- unregister_netdev(sp->dev);
+ /* We must stop the queue to avoid potentially scribbling
+ * on the free buffers. The sp->dead_sem is not sufficient
+ * to protect us from sp->xbuff access.
+ */
+ netif_stop_queue(sp->dev);
- del_timer(&sp->tx_t);
- del_timer(&sp->resync_t);
+ del_timer_sync(&sp->tx_t);
+ del_timer_sync(&sp->resync_t);
/* Free all 6pack frame buffers. */
kfree(sp->rbuff);
kfree(sp->xbuff);
+
+ unregister_netdev(sp->dev);
}
/* Perform I/O control on an active 6pack channel. */
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 216bfd350169..85828f153445 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -797,14 +797,19 @@ static void mkiss_close(struct tty_struct *tty)
*/
if (!atomic_dec_and_test(&ax->refcnt))
down(&ax->dead_sem);
-
- unregister_netdev(ax->dev);
+ /*
+ * Halt the transmit queue so that a new transmit cannot scribble
+ * on our buffers
+ */
+ netif_stop_queue(ax->dev);
/* Free all AX25 frame buffers. */
kfree(ax->rbuff);
kfree(ax->xbuff);
ax->tty = NULL;
+
+ unregister_netdev(ax->dev);
}
/* Perform I/O control on an active ax25 channel. */
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index d50887e3df6d..8c48bb2a94ea 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -254,7 +254,7 @@ acct:
}
}
-static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb,
+static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb,
bool local)
{
struct ipvl_dev *ipvlan = addr->master;
@@ -262,6 +262,7 @@ static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb,
unsigned int len;
rx_handler_result_t ret = RX_HANDLER_CONSUMED;
bool success = false;
+ struct sk_buff *skb = *pskb;
len = skb->len + ETH_HLEN;
if (unlikely(!(dev->flags & IFF_UP))) {
@@ -273,6 +274,7 @@ static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb,
if (!skb)
goto out;
+ *pskb = skb;
skb->dev = dev;
skb->pkt_type = PACKET_HOST;
@@ -486,7 +488,7 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
if (addr)
- return ipvlan_rcv_frame(addr, skb, true);
+ return ipvlan_rcv_frame(addr, &skb, true);
out:
skb->dev = ipvlan->phy_dev;
@@ -506,7 +508,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
if (lyr3h) {
addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
if (addr)
- return ipvlan_rcv_frame(addr, skb, true);
+ return ipvlan_rcv_frame(addr, &skb, true);
}
skb = skb_share_check(skb, GFP_ATOMIC);
if (!skb)
@@ -589,7 +591,7 @@ static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb,
addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
if (addr)
- ret = ipvlan_rcv_frame(addr, skb, false);
+ ret = ipvlan_rcv_frame(addr, pskb, false);
out:
return ret;
@@ -626,7 +628,7 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
if (addr)
- ret = ipvlan_rcv_frame(addr, skb, false);
+ ret = ipvlan_rcv_frame(addr, pskb, false);
}
return ret;
@@ -651,5 +653,5 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n",
port->mode);
kfree_skb(skb);
- return NET_RX_DROP;
+ return RX_HANDLER_CONSUMED;
}
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 86f6c6292c27..06c8bfeaccd6 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -415,6 +415,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
skb = ip_check_defrag(dev_net(skb->dev), skb, IP_DEFRAG_MACVLAN);
if (!skb)
return RX_HANDLER_CONSUMED;
+ *pskb = skb;
eth = eth_hdr(skb);
macvlan_forward_source(skb, port, eth->h_source);
src = macvlan_hash_lookup(port, eth->h_source);
@@ -456,6 +457,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
goto out;
}
+ *pskb = skb;
skb->dev = dev;
skb->pkt_type = PACKET_HOST;
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 54036ae0a388..0fc521941c71 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -498,7 +498,7 @@ static void macvtap_sock_write_space(struct sock *sk)
wait_queue_head_t *wqueue;
if (!sock_writeable(sk) ||
- !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
+ !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags))
return;
wqueue = sk_sleep(sk);
@@ -585,7 +585,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait)
mask |= POLLIN | POLLRDNORM;
if (sock_writeable(&q->sk) ||
- (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock.flags) &&
+ (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock.flags) &&
sock_writeable(&q->sk)))
mask |= POLLOUT | POLLWRNORM;
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index fabf11d32d27..2d020a3ec0b5 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -308,6 +308,8 @@ static struct phy_driver at803x_driver[] = {
.flags = PHY_HAS_INTERRUPT,
.config_aneg = genphy_config_aneg,
.read_status = genphy_read_status,
+ .ack_interrupt = at803x_ack_interrupt,
+ .config_intr = at803x_config_intr,
.driver = {
.owner = THIS_MODULE,
},
@@ -327,6 +329,8 @@ static struct phy_driver at803x_driver[] = {
.flags = PHY_HAS_INTERRUPT,
.config_aneg = genphy_config_aneg,
.read_status = genphy_read_status,
+ .ack_interrupt = at803x_ack_interrupt,
+ .config_intr = at803x_config_intr,
.driver = {
.owner = THIS_MODULE,
},
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 07a6119121c3..3ce5d9514623 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -614,7 +614,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = {
{ PHY_ID_BCM5461, 0xfffffff0 },
{ PHY_ID_BCM54616S, 0xfffffff0 },
{ PHY_ID_BCM5464, 0xfffffff0 },
- { PHY_ID_BCM5482, 0xfffffff0 },
+ { PHY_ID_BCM5481, 0xfffffff0 },
{ PHY_ID_BCM5482, 0xfffffff0 },
{ PHY_ID_BCM50610, 0xfffffff0 },
{ PHY_ID_BCM50610M, 0xfffffff0 },
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 5de8d5827536..0240552b50f3 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1154,6 +1154,21 @@ static struct phy_driver marvell_drivers[] = {
.driver = { .owner = THIS_MODULE },
},
{
+ .phy_id = MARVELL_PHY_ID_88E1540,
+ .phy_id_mask = MARVELL_PHY_ID_MASK,
+ .name = "Marvell 88E1540",
+ .features = PHY_GBIT_FEATURES,
+ .flags = PHY_HAS_INTERRUPT,
+ .config_aneg = &m88e1510_config_aneg,
+ .read_status = &marvell_read_status,
+ .ack_interrupt = &marvell_ack_interrupt,
+ .config_intr = &marvell_config_intr,
+ .did_interrupt = &m88e1121_did_interrupt,
+ .resume = &genphy_resume,
+ .suspend = &genphy_suspend,
+ .driver = { .owner = THIS_MODULE },
+ },
+ {
.phy_id = MARVELL_PHY_ID_88E3016,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "Marvell 88E3016",
@@ -1186,6 +1201,7 @@ static struct mdio_device_id __maybe_unused marvell_tbl[] = {
{ MARVELL_PHY_ID_88E1318S, MARVELL_PHY_ID_MASK },
{ MARVELL_PHY_ID_88E1116R, MARVELL_PHY_ID_MASK },
{ MARVELL_PHY_ID_88E1510, MARVELL_PHY_ID_MASK },
+ { MARVELL_PHY_ID_88E1540, MARVELL_PHY_ID_MASK },
{ MARVELL_PHY_ID_88E3016, MARVELL_PHY_ID_MASK },
{ }
};
diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c
index 908e8d486342..7f8e7662e28c 100644
--- a/drivers/net/phy/mdio-mux.c
+++ b/drivers/net/phy/mdio-mux.c
@@ -149,9 +149,14 @@ int mdio_mux_init(struct device *dev,
}
cb->bus_number = v;
cb->parent = pb;
+
cb->mii_bus = mdiobus_alloc();
+ if (!cb->mii_bus) {
+ ret_val = -ENOMEM;
+ of_node_put(child_bus_node);
+ break;
+ }
cb->mii_bus->priv = cb;
-
cb->mii_bus->irq = cb->phy_irq;
cb->mii_bus->name = "mdio_mux";
snprintf(cb->mii_bus->id, MII_BUS_ID_SIZE, "%x.%x",
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index cf6312fafea5..e13ad6cdcc22 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -339,9 +339,18 @@ static int ksz9021_config_init(struct phy_device *phydev)
{
const struct device *dev = &phydev->dev;
const struct device_node *of_node = dev->of_node;
+ const struct device *dev_walker;
- if (!of_node && dev->parent->of_node)
- of_node = dev->parent->of_node;
+ /* The Micrel driver has a deprecated option to place phy OF
+ * properties in the MAC node. Walk up the tree of devices to
+ * find a device with an OF node.
+ */
+ dev_walker = &phydev->dev;
+ do {
+ of_node = dev_walker->of_node;
+ dev_walker = dev_walker->parent;
+
+ } while (!of_node && dev_walker);
if (of_node) {
ksz9021_load_values_from_of(phydev, of_node,
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index adb48abafc87..47cd306dbb3c 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -448,7 +448,8 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
mdiobus_write(phydev->bus, mii_data->phy_id,
mii_data->reg_num, val);
- if (mii_data->reg_num == MII_BMCR &&
+ if (mii_data->phy_id == phydev->addr &&
+ mii_data->reg_num == MII_BMCR &&
val & BMCR_RESET)
return phy_init_hw(phydev);
@@ -863,6 +864,9 @@ void phy_state_machine(struct work_struct *work)
needs_aneg = true;
break;
case PHY_NOLINK:
+ if (phy_interrupt_is_valid(phydev))
+ break;
+
err = phy_read_status(phydev);
if (err)
break;
diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c
index 76cad712ddb2..dd295dbaa074 100644
--- a/drivers/net/phy/vitesse.c
+++ b/drivers/net/phy/vitesse.c
@@ -66,6 +66,7 @@
#define PHY_ID_VSC8244 0x000fc6c0
#define PHY_ID_VSC8514 0x00070670
#define PHY_ID_VSC8574 0x000704a0
+#define PHY_ID_VSC8601 0x00070420
#define PHY_ID_VSC8662 0x00070660
#define PHY_ID_VSC8221 0x000fc550
#define PHY_ID_VSC8211 0x000fc4b0
@@ -133,7 +134,8 @@ static int vsc82xx_config_intr(struct phy_device *phydev)
(phydev->drv->phy_id == PHY_ID_VSC8234 ||
phydev->drv->phy_id == PHY_ID_VSC8244 ||
phydev->drv->phy_id == PHY_ID_VSC8514 ||
- phydev->drv->phy_id == PHY_ID_VSC8574) ?
+ phydev->drv->phy_id == PHY_ID_VSC8574 ||
+ phydev->drv->phy_id == PHY_ID_VSC8601) ?
MII_VSC8244_IMASK_MASK :
MII_VSC8221_IMASK_MASK);
else {
@@ -272,6 +274,18 @@ static struct phy_driver vsc82xx_driver[] = {
.config_intr = &vsc82xx_config_intr,
.driver = { .owner = THIS_MODULE,},
}, {
+ .phy_id = PHY_ID_VSC8601,
+ .name = "Vitesse VSC8601",
+ .phy_id_mask = 0x000ffff0,
+ .features = PHY_GBIT_FEATURES,
+ .flags = PHY_HAS_INTERRUPT,
+ .config_init = &genphy_config_init,
+ .config_aneg = &genphy_config_aneg,
+ .read_status = &genphy_read_status,
+ .ack_interrupt = &vsc824x_ack_interrupt,
+ .config_intr = &vsc82xx_config_intr,
+ .driver = { .owner = THIS_MODULE,},
+}, {
.phy_id = PHY_ID_VSC8662,
.name = "Vitesse VSC8662",
.phy_id_mask = 0x000ffff0,
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 5e0b43283bce..0a37f840fcc5 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -568,6 +568,9 @@ static int pppoe_create(struct net *net, struct socket *sock, int kern)
sk->sk_family = PF_PPPOX;
sk->sk_protocol = PX_PROTO_OE;
+ INIT_WORK(&pppox_sk(sk)->proto.pppoe.padt_work,
+ pppoe_unbind_sock_work);
+
return 0;
}
@@ -632,8 +635,6 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
lock_sock(sk);
- INIT_WORK(&po->proto.pppoe.padt_work, pppoe_unbind_sock_work);
-
error = -EINVAL;
if (sp->sa_protocol != PX_PROTO_OE)
goto end;
@@ -663,8 +664,13 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
po->pppoe_dev = NULL;
}
- memset(sk_pppox(po) + 1, 0,
- sizeof(struct pppox_sock) - sizeof(struct sock));
+ po->pppoe_ifindex = 0;
+ memset(&po->pppoe_pa, 0, sizeof(po->pppoe_pa));
+ memset(&po->pppoe_relay, 0, sizeof(po->pppoe_relay));
+ memset(&po->chan, 0, sizeof(po->chan));
+ po->next = NULL;
+ po->num = 0;
+
sk->sk_state = PPPOX_NONE;
}
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index fc69e41d0950..597c53e0a2ec 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -419,6 +419,9 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr,
struct pptp_opt *opt = &po->proto.pptp;
int error = 0;
+ if (sockaddr_len < sizeof(struct sockaddr_pppox))
+ return -EINVAL;
+
lock_sock(sk);
opt->src_addr = sp->sa_addr.pptp;
@@ -440,6 +443,9 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr,
struct flowi4 fl4;
int error = 0;
+ if (sockaddr_len < sizeof(struct sockaddr_pppox))
+ return -EINVAL;
+
if (sp->sa_protocol != PX_PROTO_PPTP)
return -EINVAL;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b1878faea397..f0db770e8b2f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1040,7 +1040,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
mask |= POLLIN | POLLRDNORM;
if (sock_writeable(sk) ||
- (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
+ (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
sock_writeable(sk)))
mask |= POLLOUT | POLLWRNORM;
@@ -1488,7 +1488,7 @@ static void tun_sock_write_space(struct sock *sk)
if (!sock_writeable(sk))
return;
- if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
+ if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags))
return;
wqueue = sk_sleep(sk);
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index c78d3cb1b464..3da70bf9936a 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -696,6 +696,11 @@ static const struct usb_device_id products[] = {
USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
.driver_info = (kernel_ulong_t) &wwan_info,
}, {
+ /* Dell DW5580 modules */
+ USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x81ba, USB_CLASS_COMM,
+ USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
+ .driver_info = (kernel_ulong_t)&wwan_info,
+}, {
USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET,
USB_CDC_PROTO_NONE),
.driver_info = (unsigned long) &cdc_info,
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index bbde9884ab8a..bdd83d95ec0a 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -100,7 +100,7 @@ static const struct net_device_ops cdc_mbim_netdev_ops = {
.ndo_stop = usbnet_stop,
.ndo_start_xmit = usbnet_start_xmit,
.ndo_tx_timeout = usbnet_tx_timeout,
- .ndo_change_mtu = usbnet_change_mtu,
+ .ndo_change_mtu = cdc_ncm_change_mtu,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_vlan_rx_add_vid = cdc_mbim_rx_add_vid,
@@ -158,7 +158,7 @@ static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf)
if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting))
goto err;
- ret = cdc_ncm_bind_common(dev, intf, data_altsetting, 0);
+ ret = cdc_ncm_bind_common(dev, intf, data_altsetting, dev->driver_info->data);
if (ret)
goto err;
@@ -582,6 +582,26 @@ static const struct driver_info cdc_mbim_info_zlp = {
.tx_fixup = cdc_mbim_tx_fixup,
};
+/* The spefication explicitly allows NDPs to be placed anywhere in the
+ * frame, but some devices fail unless the NDP is placed after the IP
+ * packets. Using the CDC_NCM_FLAG_NDP_TO_END flags to force this
+ * behaviour.
+ *
+ * Note: The current implementation of this feature restricts each NTB
+ * to a single NDP, implying that multiplexed sessions cannot share an
+ * NTB. This might affect performace for multiplexed sessions.
+ */
+static const struct driver_info cdc_mbim_info_ndp_to_end = {
+ .description = "CDC MBIM",
+ .flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_WWAN,
+ .bind = cdc_mbim_bind,
+ .unbind = cdc_mbim_unbind,
+ .manage_power = cdc_mbim_manage_power,
+ .rx_fixup = cdc_mbim_rx_fixup,
+ .tx_fixup = cdc_mbim_tx_fixup,
+ .data = CDC_NCM_FLAG_NDP_TO_END,
+};
+
static const struct usb_device_id mbim_devs[] = {
/* This duplicate NCM entry is intentional. MBIM devices can
* be disguised as NCM by default, and this is necessary to
@@ -597,6 +617,10 @@ static const struct usb_device_id mbim_devs[] = {
{ USB_VENDOR_AND_INTERFACE_INFO(0x0bdb, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
.driver_info = (unsigned long)&cdc_mbim_info,
},
+ /* Huawei E3372 fails unless NDP comes after the IP packets */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x12d1, 0x157d, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
+ .driver_info = (unsigned long)&cdc_mbim_info_ndp_to_end,
+ },
/* default entry */
{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
.driver_info = (unsigned long)&cdc_mbim_info_zlp,
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index a187f08113ec..e8a1144c5a8b 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -41,6 +41,7 @@
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/ctype.h>
+#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/workqueue.h>
#include <linux/mii.h>
@@ -689,9 +690,35 @@ static void cdc_ncm_free(struct cdc_ncm_ctx *ctx)
kfree(ctx);
}
+/* we need to override the usbnet change_mtu ndo for two reasons:
+ * - respect the negotiated maximum datagram size
+ * - avoid unwanted changes to rx and tx buffers
+ */
+int cdc_ncm_change_mtu(struct net_device *net, int new_mtu)
+{
+ struct usbnet *dev = netdev_priv(net);
+ struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+ int maxmtu = ctx->max_datagram_size - cdc_ncm_eth_hlen(dev);
+
+ if (new_mtu <= 0 || new_mtu > maxmtu)
+ return -EINVAL;
+ net->mtu = new_mtu;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cdc_ncm_change_mtu);
+
+static const struct net_device_ops cdc_ncm_netdev_ops = {
+ .ndo_open = usbnet_open,
+ .ndo_stop = usbnet_stop,
+ .ndo_start_xmit = usbnet_start_xmit,
+ .ndo_tx_timeout = usbnet_tx_timeout,
+ .ndo_change_mtu = cdc_ncm_change_mtu,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+};
+
int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags)
{
- const struct usb_cdc_union_desc *union_desc = NULL;
struct cdc_ncm_ctx *ctx;
struct usb_driver *driver;
u8 *buf;
@@ -725,15 +752,16 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
/* parse through descriptors associated with control interface */
cdc_parse_cdc_header(&hdr, intf, buf, len);
- ctx->data = usb_ifnum_to_if(dev->udev,
- hdr.usb_cdc_union_desc->bSlaveInterface0);
+ if (hdr.usb_cdc_union_desc)
+ ctx->data = usb_ifnum_to_if(dev->udev,
+ hdr.usb_cdc_union_desc->bSlaveInterface0);
ctx->ether_desc = hdr.usb_cdc_ether_desc;
ctx->func_desc = hdr.usb_cdc_ncm_desc;
ctx->mbim_desc = hdr.usb_cdc_mbim_desc;
ctx->mbim_extended_desc = hdr.usb_cdc_mbim_extended_desc;
/* some buggy devices have an IAD but no CDC Union */
- if (!union_desc && intf->intf_assoc && intf->intf_assoc->bInterfaceCount == 2) {
+ if (!hdr.usb_cdc_union_desc && intf->intf_assoc && intf->intf_assoc->bInterfaceCount == 2) {
ctx->data = usb_ifnum_to_if(dev->udev, intf->cur_altsetting->desc.bInterfaceNumber + 1);
dev_dbg(&intf->dev, "CDC Union missing - got slave from IAD\n");
}
@@ -823,6 +851,9 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
/* add our sysfs attrs */
dev->net->sysfs_groups[0] = &cdc_ncm_sysfs_attr_group;
+ /* must handle MTU changes */
+ dev->net->netdev_ops = &cdc_ncm_netdev_ops;
+
return 0;
error2:
@@ -955,10 +986,18 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_
* NTH16 header as we would normally do. NDP isn't written to the SKB yet, and
* the wNdpIndex field in the header is actually not consistent with reality. It will be later.
*/
- if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)
+ if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) {
if (ctx->delayed_ndp16->dwSignature == sign)
return ctx->delayed_ndp16;
+ /* We can only push a single NDP to the end. Return
+ * NULL to send what we've already got and queue this
+ * skb for later.
+ */
+ else if (ctx->delayed_ndp16->dwSignature)
+ return NULL;
+ }
+
/* follow the chain of NDPs, looking for a match */
while (ndpoffset) {
ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb->data + ndpoffset);
@@ -1550,6 +1589,24 @@ static const struct usb_device_id cdc_devs[] = {
.driver_info = (unsigned long) &wwan_info,
},
+ /* DW5812 LTE Verizon Mobile Broadband Card
+ * Unlike DW5550 this device requires FLAG_NOARP
+ */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x413c, 0x81bb,
+ USB_CLASS_COMM,
+ USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE),
+ .driver_info = (unsigned long)&wwan_noarp_info,
+ },
+
+ /* DW5813 LTE AT&T Mobile Broadband Card
+ * Unlike DW5550 this device requires FLAG_NOARP
+ */
+ { USB_DEVICE_AND_INTERFACE_INFO(0x413c, 0x81bc,
+ USB_CLASS_COMM,
+ USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE),
+ .driver_info = (unsigned long)&wwan_noarp_info,
+ },
+
/* Dell branded MBM devices like DW5550 */
{ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
| USB_DEVICE_ID_MATCH_VENDOR,
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 34799eaace41..5fccc5a8153f 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -725,6 +725,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */
{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */
{QMI_FIXED_INTF(0x1bc7, 0x1201, 2)}, /* Telit LE920 */
+ {QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */
{QMI_FIXED_INTF(0x0b3c, 0xc000, 4)}, /* Olivetti Olicard 100 */
{QMI_FIXED_INTF(0x0b3c, 0xc001, 4)}, /* Olivetti Olicard 120 */
{QMI_FIXED_INTF(0x0b3c, 0xc002, 4)}, /* Olivetti Olicard 140 */
@@ -741,6 +742,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x413c, 0x81a9, 8)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
{QMI_FIXED_INTF(0x413c, 0x81b1, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
{QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
+ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */
/* 4. Gobi 1000 devices */
{QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index d9427ca3dba7..2fb637ad594a 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -3067,17 +3067,6 @@ static int rtl8152_open(struct net_device *netdev)
mutex_lock(&tp->control);
- /* The WORK_ENABLE may be set when autoresume occurs */
- if (test_bit(WORK_ENABLE, &tp->flags)) {
- clear_bit(WORK_ENABLE, &tp->flags);
- usb_kill_urb(tp->intr_urb);
- cancel_delayed_work_sync(&tp->schedule);
-
- /* disable the tx/rx, if the workqueue has enabled them. */
- if (netif_carrier_ok(netdev))
- tp->rtl_ops.disable(tp);
- }
-
tp->rtl_ops.up(tp);
rtl8152_set_speed(tp, AUTONEG_ENABLE,
@@ -3124,12 +3113,6 @@ static int rtl8152_close(struct net_device *netdev)
} else {
mutex_lock(&tp->control);
- /* The autosuspend may have been enabled and wouldn't
- * be disable when autoresume occurs, because the
- * netif_running() would be false.
- */
- rtl_runtime_suspend_enable(tp, false);
-
tp->rtl_ops.down(tp);
mutex_unlock(&tp->control);
@@ -3512,7 +3495,7 @@ static int rtl8152_resume(struct usb_interface *intf)
netif_device_attach(tp->netdev);
}
- if (netif_running(tp->netdev)) {
+ if (netif_running(tp->netdev) && tp->netdev->flags & IFF_UP) {
if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
rtl_runtime_suspend_enable(tp, false);
clear_bit(SELECTIVE_SUSPEND, &tp->flags);
@@ -3532,6 +3515,8 @@ static int rtl8152_resume(struct usb_interface *intf)
}
usb_submit_urb(tp->intr_urb, GFP_KERNEL);
} else if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
+ if (tp->netdev->flags & IFF_UP)
+ rtl_runtime_suspend_enable(tp, false);
clear_bit(SELECTIVE_SUSPEND, &tp->flags);
}
@@ -3540,6 +3525,14 @@ static int rtl8152_resume(struct usb_interface *intf)
return 0;
}
+static int rtl8152_reset_resume(struct usb_interface *intf)
+{
+ struct r8152 *tp = usb_get_intfdata(intf);
+
+ clear_bit(SELECTIVE_SUSPEND, &tp->flags);
+ return rtl8152_resume(intf);
+}
+
static void rtl8152_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
{
struct r8152 *tp = netdev_priv(dev);
@@ -4291,7 +4284,7 @@ static struct usb_driver rtl8152_driver = {
.disconnect = rtl8152_disconnect,
.suspend = rtl8152_suspend,
.resume = rtl8152_resume,
- .reset_resume = rtl8152_resume,
+ .reset_resume = rtl8152_reset_resume,
.pre_reset = rtl8152_pre_reset,
.post_reset = rtl8152_post_reset,
.supports_autosuspend = 1,
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 0ef4a5ad5557..ba21d072be31 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -117,12 +117,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
kfree_skb(skb);
goto drop;
}
- /* don't change ip_summed == CHECKSUM_PARTIAL, as that
- * will cause bad checksum on forwarded packets
- */
- if (skb->ip_summed == CHECKSUM_NONE &&
- rcv->features & NETIF_F_RXCSUM)
- skb->ip_summed = CHECKSUM_UNNECESSARY;
if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index d8838dedb7a4..f94ab786088f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -140,6 +140,12 @@ struct virtnet_info {
/* CPU hot plug notifier */
struct notifier_block nb;
+
+ /* Control VQ buffers: protected by the rtnl lock */
+ struct virtio_net_ctrl_hdr ctrl_hdr;
+ virtio_net_ctrl_ack ctrl_status;
+ u8 ctrl_promisc;
+ u8 ctrl_allmulti;
};
struct padded_vnet_hdr {
@@ -976,31 +982,30 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
struct scatterlist *out)
{
struct scatterlist *sgs[4], hdr, stat;
- struct virtio_net_ctrl_hdr ctrl;
- virtio_net_ctrl_ack status = ~0;
unsigned out_num = 0, tmp;
/* Caller should know better */
BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
- ctrl.class = class;
- ctrl.cmd = cmd;
+ vi->ctrl_status = ~0;
+ vi->ctrl_hdr.class = class;
+ vi->ctrl_hdr.cmd = cmd;
/* Add header */
- sg_init_one(&hdr, &ctrl, sizeof(ctrl));
+ sg_init_one(&hdr, &vi->ctrl_hdr, sizeof(vi->ctrl_hdr));
sgs[out_num++] = &hdr;
if (out)
sgs[out_num++] = out;
/* Add return status. */
- sg_init_one(&stat, &status, sizeof(status));
+ sg_init_one(&stat, &vi->ctrl_status, sizeof(vi->ctrl_status));
sgs[out_num] = &stat;
BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
if (unlikely(!virtqueue_kick(vi->cvq)))
- return status == VIRTIO_NET_OK;
+ return vi->ctrl_status == VIRTIO_NET_OK;
/* Spin for a response, the kick causes an ioport write, trapping
* into the hypervisor, so the request should be handled immediately.
@@ -1009,7 +1014,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
!virtqueue_is_broken(vi->cvq))
cpu_relax();
- return status == VIRTIO_NET_OK;
+ return vi->ctrl_status == VIRTIO_NET_OK;
}
static int virtnet_set_mac_address(struct net_device *dev, void *p)
@@ -1151,7 +1156,6 @@ static void virtnet_set_rx_mode(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
struct scatterlist sg[2];
- u8 promisc, allmulti;
struct virtio_net_ctrl_mac *mac_data;
struct netdev_hw_addr *ha;
int uc_count;
@@ -1163,22 +1167,22 @@ static void virtnet_set_rx_mode(struct net_device *dev)
if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
return;
- promisc = ((dev->flags & IFF_PROMISC) != 0);
- allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
+ vi->ctrl_promisc = ((dev->flags & IFF_PROMISC) != 0);
+ vi->ctrl_allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
- sg_init_one(sg, &promisc, sizeof(promisc));
+ sg_init_one(sg, &vi->ctrl_promisc, sizeof(vi->ctrl_promisc));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
VIRTIO_NET_CTRL_RX_PROMISC, sg))
dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
- promisc ? "en" : "dis");
+ vi->ctrl_promisc ? "en" : "dis");
- sg_init_one(sg, &allmulti, sizeof(allmulti));
+ sg_init_one(sg, &vi->ctrl_allmulti, sizeof(vi->ctrl_allmulti));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
- allmulti ? "en" : "dis");
+ vi->ctrl_allmulti ? "en" : "dis");
uc_count = netdev_uc_count(dev);
mc_count = netdev_mc_count(dev);
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 46f4caddccbe..0cbf520cea77 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -587,6 +587,12 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
&adapter->pdev->dev,
rbi->skb->data, rbi->len,
PCI_DMA_FROMDEVICE);
+ if (dma_mapping_error(&adapter->pdev->dev,
+ rbi->dma_addr)) {
+ dev_kfree_skb_any(rbi->skb);
+ rq->stats.rx_buf_alloc_failure++;
+ break;
+ }
} else {
/* rx buffer skipped by the device */
}
@@ -605,13 +611,18 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
&adapter->pdev->dev,
rbi->page, 0, PAGE_SIZE,
PCI_DMA_FROMDEVICE);
+ if (dma_mapping_error(&adapter->pdev->dev,
+ rbi->dma_addr)) {
+ put_page(rbi->page);
+ rq->stats.rx_buf_alloc_failure++;
+ break;
+ }
} else {
/* rx buffers skipped by the device */
}
val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
}
- BUG_ON(rbi->dma_addr == 0);
gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
| val | rbi->len);
@@ -655,7 +666,7 @@ vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
}
-static void
+static int
vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
struct vmxnet3_adapter *adapter)
@@ -715,6 +726,8 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
skb->data + buf_offset, buf_size,
PCI_DMA_TODEVICE);
+ if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
+ return -EFAULT;
tbi->len = buf_size;
@@ -755,6 +768,8 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag,
buf_offset, buf_size,
DMA_TO_DEVICE);
+ if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
+ return -EFAULT;
tbi->len = buf_size;
@@ -782,6 +797,8 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
/* set the last buf_info for the pkt */
tbi->skb = skb;
tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
+
+ return 0;
}
@@ -1020,7 +1037,8 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
}
/* fill tx descs related to addr & len */
- vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter);
+ if (vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter))
+ goto unlock_drop_pkt;
/* setup the EOP desc */
ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
@@ -1231,6 +1249,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
struct vmxnet3_rx_buf_info *rbi;
struct sk_buff *skb, *new_skb = NULL;
struct page *new_page = NULL;
+ dma_addr_t new_dma_addr;
int num_to_alloc;
struct Vmxnet3_RxDesc *rxd;
u32 idx, ring_idx;
@@ -1287,6 +1306,21 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
skip_page_frags = true;
goto rcd_done;
}
+ new_dma_addr = dma_map_single(&adapter->pdev->dev,
+ new_skb->data, rbi->len,
+ PCI_DMA_FROMDEVICE);
+ if (dma_mapping_error(&adapter->pdev->dev,
+ new_dma_addr)) {
+ dev_kfree_skb(new_skb);
+ /* Skb allocation failed, do not handover this
+ * skb to stack. Reuse it. Drop the existing pkt
+ */
+ rq->stats.rx_buf_alloc_failure++;
+ ctx->skb = NULL;
+ rq->stats.drop_total++;
+ skip_page_frags = true;
+ goto rcd_done;
+ }
dma_unmap_single(&adapter->pdev->dev, rbi->dma_addr,
rbi->len,
@@ -1303,9 +1337,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
/* Immediate refill */
rbi->skb = new_skb;
- rbi->dma_addr = dma_map_single(&adapter->pdev->dev,
- rbi->skb->data, rbi->len,
- PCI_DMA_FROMDEVICE);
+ rbi->dma_addr = new_dma_addr;
rxd->addr = cpu_to_le64(rbi->dma_addr);
rxd->len = rbi->len;
if (adapter->version == 2 &&
@@ -1348,6 +1380,19 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
skip_page_frags = true;
goto rcd_done;
}
+ new_dma_addr = dma_map_page(&adapter->pdev->dev,
+ new_page,
+ 0, PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
+ if (dma_mapping_error(&adapter->pdev->dev,
+ new_dma_addr)) {
+ put_page(new_page);
+ rq->stats.rx_buf_alloc_failure++;
+ dev_kfree_skb(ctx->skb);
+ ctx->skb = NULL;
+ skip_page_frags = true;
+ goto rcd_done;
+ }
dma_unmap_page(&adapter->pdev->dev,
rbi->dma_addr, rbi->len,
@@ -1357,10 +1402,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
/* Immediate refill */
rbi->page = new_page;
- rbi->dma_addr = dma_map_page(&adapter->pdev->dev
- , rbi->page,
- 0, PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
+ rbi->dma_addr = new_dma_addr;
rxd->addr = cpu_to_le64(rbi->dma_addr);
rxd->len = rbi->len;
}
@@ -2157,16 +2199,18 @@ vmxnet3_set_mc(struct net_device *netdev)
if (!netdev_mc_empty(netdev)) {
new_table = vmxnet3_copy_mc(netdev);
if (new_table) {
- rxConf->mfTableLen = cpu_to_le16(
- netdev_mc_count(netdev) * ETH_ALEN);
+ size_t sz = netdev_mc_count(netdev) * ETH_ALEN;
+
+ rxConf->mfTableLen = cpu_to_le16(sz);
new_table_pa = dma_map_single(
&adapter->pdev->dev,
new_table,
- rxConf->mfTableLen,
+ sz,
PCI_DMA_TODEVICE);
}
- if (new_table_pa) {
+ if (!dma_mapping_error(&adapter->pdev->dev,
+ new_table_pa)) {
new_mode |= VMXNET3_RXM_MCAST;
rxConf->mfTablePA = cpu_to_le64(new_table_pa);
} else {
@@ -3074,6 +3118,11 @@ vmxnet3_probe_device(struct pci_dev *pdev,
adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
sizeof(struct vmxnet3_adapter),
PCI_DMA_TODEVICE);
+ if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) {
+ dev_err(&pdev->dev, "Failed to map dma\n");
+ err = -EFAULT;
+ goto err_dma_map;
+ }
adapter->shared = dma_alloc_coherent(
&adapter->pdev->dev,
sizeof(struct Vmxnet3_DriverShared),
@@ -3232,6 +3281,7 @@ err_alloc_queue_desc:
err_alloc_shared:
dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
+err_dma_map:
free_netdev(netdev);
return err;
}
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 3f859a55c035..bdb8a6c0f8aa 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -69,10 +69,10 @@
/*
* Version numbers
*/
-#define VMXNET3_DRIVER_VERSION_STRING "1.4.3.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING "1.4.5.0-k"
/* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM 0x01040300
+#define VMXNET3_DRIVER_VERSION_NUM 0x01040500
#if defined(CONFIG_PCI_MSI)
/* RSS only makes sense if MSI-X is supported. */
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 92fa3e1ea65c..0a242b200df4 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -800,7 +800,7 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev,
}
/* called under rcu_read_lock */
-static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
+static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
{
struct fib_result res = { .tclassid = 0 };
struct net *net = dev_net(dev);
@@ -808,9 +808,10 @@ static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
u8 flags = fl4->flowi4_flags;
u8 scope = fl4->flowi4_scope;
u8 tos = RT_FL_TOS(fl4);
+ int rc;
if (unlikely(!fl4->daddr))
- return;
+ return 0;
fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
@@ -818,7 +819,8 @@ static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
- if (!fib_lookup(net, fl4, &res, 0)) {
+ rc = fib_lookup(net, fl4, &res, 0);
+ if (!rc) {
if (res.type == RTN_LOCAL)
fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr;
else
@@ -828,6 +830,8 @@ static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
fl4->flowi4_flags = flags;
fl4->flowi4_tos = orig_tos;
fl4->flowi4_scope = scope;
+
+ return rc;
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -907,7 +911,6 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct net_vrf *vrf = netdev_priv(dev);
- int err;
if (!data || !data[IFLA_VRF_TABLE])
return -EINVAL;
@@ -916,15 +919,7 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
dev->priv_flags |= IFF_L3MDEV_MASTER;
- err = register_netdevice(dev);
- if (err < 0)
- goto out_fail;
-
- return 0;
-
-out_fail:
- free_netdev(dev);
- return err;
+ return register_netdevice(dev);
}
static size_t vrf_nl_getsize(const struct net_device *dev)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 6369a5734d4c..ba363cedef80 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1158,7 +1158,6 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
struct pcpu_sw_netstats *stats;
union vxlan_addr saddr;
int err = 0;
- union vxlan_addr *remote_ip;
/* For flow based devices, map all packets to VNI 0 */
if (vs->flags & VXLAN_F_COLLECT_METADATA)
@@ -1169,7 +1168,6 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
if (!vxlan)
goto drop;
- remote_ip = &vxlan->default_dst.remote_ip;
skb_reset_mac_header(skb);
skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
skb->protocol = eth_type_trans(skb, vxlan->dev);
@@ -1179,8 +1177,8 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
goto drop;
- /* Re-examine inner Ethernet packet */
- if (remote_ip->sa.sa_family == AF_INET) {
+ /* Get data from the outer IP header */
+ if (vxlan_get_sk_family(vs) == AF_INET) {
oip = ip_hdr(skb);
saddr.sin.sin_addr.s_addr = oip->saddr;
saddr.sa.sa_family = AF_INET;
@@ -1848,6 +1846,34 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *sk
!(vxflags & VXLAN_F_UDP_CSUM));
}
+#if IS_ENABLED(CONFIG_IPV6)
+static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
+ struct sk_buff *skb, int oif,
+ const struct in6_addr *daddr,
+ struct in6_addr *saddr)
+{
+ struct dst_entry *ndst;
+ struct flowi6 fl6;
+ int err;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = oif;
+ fl6.daddr = *daddr;
+ fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr;
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_proto = IPPROTO_UDP;
+
+ err = ipv6_stub->ipv6_dst_lookup(vxlan->net,
+ vxlan->vn6_sock->sock->sk,
+ &ndst, &fl6);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ *saddr = fl6.saddr;
+ return ndst;
+}
+#endif
+
/* Bypass encapsulation if the destination is local */
static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
struct vxlan_dev *dst_vxlan)
@@ -2035,21 +2061,17 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
#if IS_ENABLED(CONFIG_IPV6)
} else {
struct dst_entry *ndst;
- struct flowi6 fl6;
+ struct in6_addr saddr;
u32 rt6i_flags;
if (!vxlan->vn6_sock)
goto drop;
sk = vxlan->vn6_sock->sock->sk;
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_oif = rdst ? rdst->remote_ifindex : 0;
- fl6.daddr = dst->sin6.sin6_addr;
- fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr;
- fl6.flowi6_mark = skb->mark;
- fl6.flowi6_proto = IPPROTO_UDP;
-
- if (ipv6_stub->ipv6_dst_lookup(vxlan->net, sk, &ndst, &fl6)) {
+ ndst = vxlan6_get_route(vxlan, skb,
+ rdst ? rdst->remote_ifindex : 0,
+ &dst->sin6.sin6_addr, &saddr);
+ if (IS_ERR(ndst)) {
netdev_dbg(dev, "no route to %pI6\n",
&dst->sin6.sin6_addr);
dev->stats.tx_carrier_errors++;
@@ -2081,7 +2103,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
}
ttl = ttl ? : ip6_dst_hoplimit(ndst);
- err = vxlan6_xmit_skb(ndst, sk, skb, dev, &fl6.saddr, &fl6.daddr,
+ err = vxlan6_xmit_skb(ndst, sk, skb, dev, &saddr, &dst->sin6.sin6_addr,
0, ttl, src_port, dst_port, htonl(vni << 8), md,
!net_eq(vxlan->net, dev_net(vxlan->dev)),
flags);
@@ -2395,9 +2417,30 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
vxlan->cfg.port_max, true);
dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
- if (ip_tunnel_info_af(info) == AF_INET)
+ if (ip_tunnel_info_af(info) == AF_INET) {
+ if (!vxlan->vn4_sock)
+ return -EINVAL;
return egress_ipv4_tun_info(dev, skb, info, sport, dport);
- return -EINVAL;
+ } else {
+#if IS_ENABLED(CONFIG_IPV6)
+ struct dst_entry *ndst;
+
+ if (!vxlan->vn6_sock)
+ return -EINVAL;
+ ndst = vxlan6_get_route(vxlan, skb, 0,
+ &info->key.u.ipv6.dst,
+ &info->key.u.ipv6.src);
+ if (IS_ERR(ndst))
+ return PTR_ERR(ndst);
+ dst_release(ndst);
+
+ info->key.tp_src = sport;
+ info->key.tp_dst = dport;
+#else /* !CONFIG_IPV6 */
+ return -EPFNOSUPPORT;
+#endif
+ }
+ return 0;
}
static const struct net_device_ops vxlan_netdev_ops = {
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index e92aaf615901..89541cc90e87 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1075,11 +1075,10 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
used = pvc_is_used(pvc);
- if (type == ARPHRD_ETHER) {
+ if (type == ARPHRD_ETHER)
dev = alloc_netdev(0, "pvceth%d", NET_NAME_UNKNOWN,
ether_setup);
- dev->priv_flags &= ~IFF_TX_SKB_SHARING;
- } else
+ else
dev = alloc_netdev(0, "pvc%d", NET_NAME_UNKNOWN, pvc_setup);
if (!dev) {
@@ -1088,9 +1087,10 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
return -ENOBUFS;
}
- if (type == ARPHRD_ETHER)
+ if (type == ARPHRD_ETHER) {
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
eth_hw_addr_random(dev);
- else {
+ } else {
*(__be16*)dev->dev_addr = htons(dlci);
dlci_to_q922(dev->broadcast, dlci);
}
diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c
index 5c47b011a9d7..cd39025d2abf 100644
--- a/drivers/net/wan/x25_asy.c
+++ b/drivers/net/wan/x25_asy.c
@@ -549,16 +549,12 @@ static void x25_asy_receive_buf(struct tty_struct *tty,
static int x25_asy_open_tty(struct tty_struct *tty)
{
- struct x25_asy *sl = tty->disc_data;
+ struct x25_asy *sl;
int err;
if (tty->ops->write == NULL)
return -EOPNOTSUPP;
- /* First make sure we're not already connected. */
- if (sl && sl->magic == X25_ASY_MAGIC)
- return -EEXIST;
-
/* OK. Find a free X.25 channel to use. */
sl = x25_asy_alloc();
if (sl == NULL)
diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index aa9bd92ac4ed..0947cc271e69 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -51,6 +51,7 @@ MODULE_PARM_DESC(rawmode, "Use raw 802.11 frame datapath");
static const struct ath10k_hw_params ath10k_hw_params_list[] = {
{
.id = QCA988X_HW_2_0_VERSION,
+ .dev_id = QCA988X_2_0_DEVICE_ID,
.name = "qca988x hw2.0",
.patch_load_addr = QCA988X_HW_2_0_PATCH_LOAD_ADDR,
.uart_pin = 7,
@@ -69,6 +70,25 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
},
{
.id = QCA6174_HW_2_1_VERSION,
+ .dev_id = QCA6164_2_1_DEVICE_ID,
+ .name = "qca6164 hw2.1",
+ .patch_load_addr = QCA6174_HW_2_1_PATCH_LOAD_ADDR,
+ .uart_pin = 6,
+ .otp_exe_param = 0,
+ .channel_counters_freq_hz = 88000,
+ .max_probe_resp_desc_thres = 0,
+ .fw = {
+ .dir = QCA6174_HW_2_1_FW_DIR,
+ .fw = QCA6174_HW_2_1_FW_FILE,
+ .otp = QCA6174_HW_2_1_OTP_FILE,
+ .board = QCA6174_HW_2_1_BOARD_DATA_FILE,
+ .board_size = QCA6174_BOARD_DATA_SZ,
+ .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ,
+ },
+ },
+ {
+ .id = QCA6174_HW_2_1_VERSION,
+ .dev_id = QCA6174_2_1_DEVICE_ID,
.name = "qca6174 hw2.1",
.patch_load_addr = QCA6174_HW_2_1_PATCH_LOAD_ADDR,
.uart_pin = 6,
@@ -86,6 +106,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
},
{
.id = QCA6174_HW_3_0_VERSION,
+ .dev_id = QCA6174_2_1_DEVICE_ID,
.name = "qca6174 hw3.0",
.patch_load_addr = QCA6174_HW_3_0_PATCH_LOAD_ADDR,
.uart_pin = 6,
@@ -103,6 +124,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
},
{
.id = QCA6174_HW_3_2_VERSION,
+ .dev_id = QCA6174_2_1_DEVICE_ID,
.name = "qca6174 hw3.2",
.patch_load_addr = QCA6174_HW_3_0_PATCH_LOAD_ADDR,
.uart_pin = 6,
@@ -121,6 +143,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
},
{
.id = QCA99X0_HW_2_0_DEV_VERSION,
+ .dev_id = QCA99X0_2_0_DEVICE_ID,
.name = "qca99x0 hw2.0",
.patch_load_addr = QCA99X0_HW_2_0_PATCH_LOAD_ADDR,
.uart_pin = 7,
@@ -139,10 +162,31 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
},
{
.id = QCA9377_HW_1_0_DEV_VERSION,
+ .dev_id = QCA9377_1_0_DEVICE_ID,
.name = "qca9377 hw1.0",
.patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR,
- .uart_pin = 7,
+ .uart_pin = 6,
.otp_exe_param = 0,
+ .channel_counters_freq_hz = 88000,
+ .max_probe_resp_desc_thres = 0,
+ .fw = {
+ .dir = QCA9377_HW_1_0_FW_DIR,
+ .fw = QCA9377_HW_1_0_FW_FILE,
+ .otp = QCA9377_HW_1_0_OTP_FILE,
+ .board = QCA9377_HW_1_0_BOARD_DATA_FILE,
+ .board_size = QCA9377_BOARD_DATA_SZ,
+ .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ,
+ },
+ },
+ {
+ .id = QCA9377_HW_1_1_DEV_VERSION,
+ .dev_id = QCA9377_1_0_DEVICE_ID,
+ .name = "qca9377 hw1.1",
+ .patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR,
+ .uart_pin = 6,
+ .otp_exe_param = 0,
+ .channel_counters_freq_hz = 88000,
+ .max_probe_resp_desc_thres = 0,
.fw = {
.dir = QCA9377_HW_1_0_FW_DIR,
.fw = QCA9377_HW_1_0_FW_FILE,
@@ -1263,7 +1307,8 @@ static int ath10k_init_hw_params(struct ath10k *ar)
for (i = 0; i < ARRAY_SIZE(ath10k_hw_params_list); i++) {
hw_params = &ath10k_hw_params_list[i];
- if (hw_params->id == ar->target_version)
+ if (hw_params->id == ar->target_version &&
+ hw_params->dev_id == ar->dev_id)
break;
}
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 018c64f4fd25..858d75f49a9f 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -636,6 +636,7 @@ struct ath10k {
struct ath10k_hw_params {
u32 id;
+ u16 dev_id;
const char *name;
u32 patch_load_addr;
int uart_pin;
diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h
index 39966a05c1cc..713c2bcea178 100644
--- a/drivers/net/wireless/ath/ath10k/hw.h
+++ b/drivers/net/wireless/ath/ath10k/hw.h
@@ -22,6 +22,12 @@
#define ATH10K_FW_DIR "ath10k"
+#define QCA988X_2_0_DEVICE_ID (0x003c)
+#define QCA6164_2_1_DEVICE_ID (0x0041)
+#define QCA6174_2_1_DEVICE_ID (0x003e)
+#define QCA99X0_2_0_DEVICE_ID (0x0040)
+#define QCA9377_1_0_DEVICE_ID (0x0042)
+
/* QCA988X 1.0 definitions (unsupported) */
#define QCA988X_HW_1_0_CHIP_ID_REV 0x0
@@ -42,6 +48,10 @@
#define QCA6174_HW_3_0_VERSION 0x05020000
#define QCA6174_HW_3_2_VERSION 0x05030000
+/* QCA9377 target BMI version signatures */
+#define QCA9377_HW_1_0_DEV_VERSION 0x05020000
+#define QCA9377_HW_1_1_DEV_VERSION 0x05020001
+
enum qca6174_pci_rev {
QCA6174_PCI_REV_1_1 = 0x11,
QCA6174_PCI_REV_1_3 = 0x13,
@@ -60,6 +70,11 @@ enum qca6174_chip_id_rev {
QCA6174_HW_3_2_CHIP_ID_REV = 10,
};
+enum qca9377_chip_id_rev {
+ QCA9377_HW_1_0_CHIP_ID_REV = 0x0,
+ QCA9377_HW_1_1_CHIP_ID_REV = 0x1,
+};
+
#define QCA6174_HW_2_1_FW_DIR "ath10k/QCA6174/hw2.1"
#define QCA6174_HW_2_1_FW_FILE "firmware.bin"
#define QCA6174_HW_2_1_OTP_FILE "otp.bin"
@@ -85,8 +100,6 @@ enum qca6174_chip_id_rev {
#define QCA99X0_HW_2_0_PATCH_LOAD_ADDR 0x1234
/* QCA9377 1.0 definitions */
-#define QCA9377_HW_1_0_DEV_VERSION 0x05020001
-#define QCA9377_HW_1_0_CHIP_ID_REV 0x1
#define QCA9377_HW_1_0_FW_DIR ATH10K_FW_DIR "/QCA9377/hw1.0"
#define QCA9377_HW_1_0_FW_FILE "firmware.bin"
#define QCA9377_HW_1_0_OTP_FILE "otp.bin"
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index a7411fe90cc4..95a55405ebf0 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -4225,7 +4225,7 @@ static int ath10k_config(struct ieee80211_hw *hw, u32 changed)
static u32 get_nss_from_chainmask(u16 chain_mask)
{
- if ((chain_mask & 0x15) == 0x15)
+ if ((chain_mask & 0xf) == 0xf)
return 4;
else if ((chain_mask & 0x7) == 0x7)
return 3;
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 3fca200b986c..930785a724e1 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -57,12 +57,6 @@ MODULE_PARM_DESC(reset_mode, "0: auto, 1: warm only (default: 0)");
#define ATH10K_PCI_TARGET_WAIT 3000
#define ATH10K_PCI_NUM_WARM_RESET_ATTEMPTS 3
-#define QCA988X_2_0_DEVICE_ID (0x003c)
-#define QCA6164_2_1_DEVICE_ID (0x0041)
-#define QCA6174_2_1_DEVICE_ID (0x003e)
-#define QCA99X0_2_0_DEVICE_ID (0x0040)
-#define QCA9377_1_0_DEVICE_ID (0x0042)
-
static const struct pci_device_id ath10k_pci_id_table[] = {
{ PCI_VDEVICE(ATHEROS, QCA988X_2_0_DEVICE_ID) }, /* PCI-E QCA988X V2 */
{ PCI_VDEVICE(ATHEROS, QCA6164_2_1_DEVICE_ID) }, /* PCI-E QCA6164 V2.1 */
@@ -92,7 +86,9 @@ static const struct ath10k_pci_supp_chip ath10k_pci_supp_chips[] = {
{ QCA6174_2_1_DEVICE_ID, QCA6174_HW_3_2_CHIP_ID_REV },
{ QCA99X0_2_0_DEVICE_ID, QCA99X0_HW_2_0_CHIP_ID_REV },
+
{ QCA9377_1_0_DEVICE_ID, QCA9377_HW_1_0_CHIP_ID_REV },
+ { QCA9377_1_0_DEVICE_ID, QCA9377_HW_1_1_CHIP_ID_REV },
};
static void ath10k_pci_buffer_cleanup(struct ath10k *ar);
@@ -111,8 +107,9 @@ static void ath10k_pci_htc_tx_cb(struct ath10k_ce_pipe *ce_state);
static void ath10k_pci_htc_rx_cb(struct ath10k_ce_pipe *ce_state);
static void ath10k_pci_htt_tx_cb(struct ath10k_ce_pipe *ce_state);
static void ath10k_pci_htt_rx_cb(struct ath10k_ce_pipe *ce_state);
+static void ath10k_pci_htt_htc_rx_cb(struct ath10k_ce_pipe *ce_state);
-static const struct ce_attr host_ce_config_wlan[] = {
+static struct ce_attr host_ce_config_wlan[] = {
/* CE0: host->target HTC control and raw streams */
{
.flags = CE_ATTR_FLAGS,
@@ -128,7 +125,7 @@ static const struct ce_attr host_ce_config_wlan[] = {
.src_nentries = 0,
.src_sz_max = 2048,
.dest_nentries = 512,
- .recv_cb = ath10k_pci_htc_rx_cb,
+ .recv_cb = ath10k_pci_htt_htc_rx_cb,
},
/* CE2: target->host WMI */
@@ -217,7 +214,7 @@ static const struct ce_attr host_ce_config_wlan[] = {
};
/* Target firmware's Copy Engine configuration. */
-static const struct ce_pipe_config target_ce_config_wlan[] = {
+static struct ce_pipe_config target_ce_config_wlan[] = {
/* CE0: host->target HTC control and raw streams */
{
.pipenum = __cpu_to_le32(0),
@@ -330,7 +327,7 @@ static const struct ce_pipe_config target_ce_config_wlan[] = {
* This table is derived from the CE_PCI TABLE, above.
* It is passed to the Target at startup for use by firmware.
*/
-static const struct service_to_pipe target_service_to_ce_map_wlan[] = {
+static struct service_to_pipe target_service_to_ce_map_wlan[] = {
{
__cpu_to_le32(ATH10K_HTC_SVC_ID_WMI_DATA_VO),
__cpu_to_le32(PIPEDIR_OUT), /* out = UL = host -> target */
@@ -1208,6 +1205,16 @@ static void ath10k_pci_htc_rx_cb(struct ath10k_ce_pipe *ce_state)
ath10k_pci_process_rx_cb(ce_state, ath10k_htc_rx_completion_handler);
}
+static void ath10k_pci_htt_htc_rx_cb(struct ath10k_ce_pipe *ce_state)
+{
+ /* CE4 polling needs to be done whenever CE pipe which transports
+ * HTT Rx (target->host) is processed.
+ */
+ ath10k_ce_per_engine_service(ce_state->ar, 4);
+
+ ath10k_pci_process_rx_cb(ce_state, ath10k_htc_rx_completion_handler);
+}
+
/* Called by lower (CE) layer when a send to HTT Target completes. */
static void ath10k_pci_htt_tx_cb(struct ath10k_ce_pipe *ce_state)
{
@@ -2027,6 +2034,29 @@ static int ath10k_pci_init_config(struct ath10k *ar)
return 0;
}
+static void ath10k_pci_override_ce_config(struct ath10k *ar)
+{
+ struct ce_attr *attr;
+ struct ce_pipe_config *config;
+
+ /* For QCA6174 we're overriding the Copy Engine 5 configuration,
+ * since it is currently used for other feature.
+ */
+
+ /* Override Host's Copy Engine 5 configuration */
+ attr = &host_ce_config_wlan[5];
+ attr->src_sz_max = 0;
+ attr->dest_nentries = 0;
+
+ /* Override Target firmware's Copy Engine configuration */
+ config = &target_ce_config_wlan[5];
+ config->pipedir = __cpu_to_le32(PIPEDIR_OUT);
+ config->nbytes_max = __cpu_to_le32(2048);
+
+ /* Map from service/endpoint to Copy Engine */
+ target_service_to_ce_map_wlan[15].pipenum = __cpu_to_le32(1);
+}
+
static int ath10k_pci_alloc_pipes(struct ath10k *ar)
{
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
@@ -3020,6 +3050,9 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
goto err_core_destroy;
}
+ if (QCA_REV_6174(ar))
+ ath10k_pci_override_ce_config(ar);
+
ret = ath10k_pci_alloc_pipes(ar);
if (ret) {
ath10k_err(ar, "failed to allocate copy engine pipes: %d\n",
diff --git a/drivers/net/wireless/iwlwifi/iwl-7000.c b/drivers/net/wireless/iwlwifi/iwl-7000.c
index 1a73c7a1da77..d9a4aee246a6 100644
--- a/drivers/net/wireless/iwlwifi/iwl-7000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-7000.c
@@ -70,12 +70,18 @@
/* Highest firmware API version supported */
#define IWL7260_UCODE_API_MAX 17
+#define IWL7265_UCODE_API_MAX 19
+#define IWL7265D_UCODE_API_MAX 19
/* Oldest version we won't warn about */
#define IWL7260_UCODE_API_OK 13
+#define IWL7265_UCODE_API_OK 13
+#define IWL7265D_UCODE_API_OK 13
/* Lowest firmware API version supported */
#define IWL7260_UCODE_API_MIN 13
+#define IWL7265_UCODE_API_MIN 13
+#define IWL7265D_UCODE_API_MIN 13
/* NVM versions */
#define IWL7260_NVM_VERSION 0x0a1d
@@ -149,10 +155,7 @@ static const struct iwl_ht_params iwl7000_ht_params = {
.ht40_bands = BIT(IEEE80211_BAND_2GHZ) | BIT(IEEE80211_BAND_5GHZ),
};
-#define IWL_DEVICE_7000 \
- .ucode_api_max = IWL7260_UCODE_API_MAX, \
- .ucode_api_ok = IWL7260_UCODE_API_OK, \
- .ucode_api_min = IWL7260_UCODE_API_MIN, \
+#define IWL_DEVICE_7000_COMMON \
.device_family = IWL_DEVICE_FAMILY_7000, \
.max_inst_size = IWL60_RTC_INST_SIZE, \
.max_data_size = IWL60_RTC_DATA_SIZE, \
@@ -163,6 +166,24 @@ static const struct iwl_ht_params iwl7000_ht_params = {
.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, \
.dccm_offset = IWL7000_DCCM_OFFSET
+#define IWL_DEVICE_7000 \
+ IWL_DEVICE_7000_COMMON, \
+ .ucode_api_max = IWL7260_UCODE_API_MAX, \
+ .ucode_api_ok = IWL7260_UCODE_API_OK, \
+ .ucode_api_min = IWL7260_UCODE_API_MIN
+
+#define IWL_DEVICE_7005 \
+ IWL_DEVICE_7000_COMMON, \
+ .ucode_api_max = IWL7265_UCODE_API_MAX, \
+ .ucode_api_ok = IWL7265_UCODE_API_OK, \
+ .ucode_api_min = IWL7265_UCODE_API_MIN
+
+#define IWL_DEVICE_7005D \
+ IWL_DEVICE_7000_COMMON, \
+ .ucode_api_max = IWL7265D_UCODE_API_MAX, \
+ .ucode_api_ok = IWL7265D_UCODE_API_OK, \
+ .ucode_api_min = IWL7265D_UCODE_API_MIN
+
const struct iwl_cfg iwl7260_2ac_cfg = {
.name = "Intel(R) Dual Band Wireless AC 7260",
.fw_name_pre = IWL7260_FW_PRE,
@@ -266,7 +287,7 @@ static const struct iwl_ht_params iwl7265_ht_params = {
const struct iwl_cfg iwl3165_2ac_cfg = {
.name = "Intel(R) Dual Band Wireless AC 3165",
.fw_name_pre = IWL7265D_FW_PRE,
- IWL_DEVICE_7000,
+ IWL_DEVICE_7005D,
.ht_params = &iwl7000_ht_params,
.nvm_ver = IWL3165_NVM_VERSION,
.nvm_calib_ver = IWL3165_TX_POWER_VERSION,
@@ -277,7 +298,7 @@ const struct iwl_cfg iwl3165_2ac_cfg = {
const struct iwl_cfg iwl7265_2ac_cfg = {
.name = "Intel(R) Dual Band Wireless AC 7265",
.fw_name_pre = IWL7265_FW_PRE,
- IWL_DEVICE_7000,
+ IWL_DEVICE_7005,
.ht_params = &iwl7265_ht_params,
.nvm_ver = IWL7265_NVM_VERSION,
.nvm_calib_ver = IWL7265_TX_POWER_VERSION,
@@ -288,7 +309,7 @@ const struct iwl_cfg iwl7265_2ac_cfg = {
const struct iwl_cfg iwl7265_2n_cfg = {
.name = "Intel(R) Dual Band Wireless N 7265",
.fw_name_pre = IWL7265_FW_PRE,
- IWL_DEVICE_7000,
+ IWL_DEVICE_7005,
.ht_params = &iwl7265_ht_params,
.nvm_ver = IWL7265_NVM_VERSION,
.nvm_calib_ver = IWL7265_TX_POWER_VERSION,
@@ -299,7 +320,7 @@ const struct iwl_cfg iwl7265_2n_cfg = {
const struct iwl_cfg iwl7265_n_cfg = {
.name = "Intel(R) Wireless N 7265",
.fw_name_pre = IWL7265_FW_PRE,
- IWL_DEVICE_7000,
+ IWL_DEVICE_7005,
.ht_params = &iwl7265_ht_params,
.nvm_ver = IWL7265_NVM_VERSION,
.nvm_calib_ver = IWL7265_TX_POWER_VERSION,
@@ -310,7 +331,7 @@ const struct iwl_cfg iwl7265_n_cfg = {
const struct iwl_cfg iwl7265d_2ac_cfg = {
.name = "Intel(R) Dual Band Wireless AC 7265",
.fw_name_pre = IWL7265D_FW_PRE,
- IWL_DEVICE_7000,
+ IWL_DEVICE_7005D,
.ht_params = &iwl7265_ht_params,
.nvm_ver = IWL7265D_NVM_VERSION,
.nvm_calib_ver = IWL7265_TX_POWER_VERSION,
@@ -321,7 +342,7 @@ const struct iwl_cfg iwl7265d_2ac_cfg = {
const struct iwl_cfg iwl7265d_2n_cfg = {
.name = "Intel(R) Dual Band Wireless N 7265",
.fw_name_pre = IWL7265D_FW_PRE,
- IWL_DEVICE_7000,
+ IWL_DEVICE_7005D,
.ht_params = &iwl7265_ht_params,
.nvm_ver = IWL7265D_NVM_VERSION,
.nvm_calib_ver = IWL7265_TX_POWER_VERSION,
@@ -332,7 +353,7 @@ const struct iwl_cfg iwl7265d_2n_cfg = {
const struct iwl_cfg iwl7265d_n_cfg = {
.name = "Intel(R) Wireless N 7265",
.fw_name_pre = IWL7265D_FW_PRE,
- IWL_DEVICE_7000,
+ IWL_DEVICE_7005D,
.ht_params = &iwl7265_ht_params,
.nvm_ver = IWL7265D_NVM_VERSION,
.nvm_calib_ver = IWL7265_TX_POWER_VERSION,
@@ -342,5 +363,5 @@ const struct iwl_cfg iwl7265d_n_cfg = {
MODULE_FIRMWARE(IWL7260_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
MODULE_FIRMWARE(IWL3160_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
-MODULE_FIRMWARE(IWL7265_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
-MODULE_FIRMWARE(IWL7265D_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
+MODULE_FIRMWARE(IWL7265_MODULE_FIRMWARE(IWL7265_UCODE_API_OK));
+MODULE_FIRMWARE(IWL7265D_MODULE_FIRMWARE(IWL7265D_UCODE_API_OK));
diff --git a/drivers/net/wireless/iwlwifi/iwl-8000.c b/drivers/net/wireless/iwlwifi/iwl-8000.c
index 0116e5a4c393..9bcc0bf937d8 100644
--- a/drivers/net/wireless/iwlwifi/iwl-8000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-8000.c
@@ -69,7 +69,7 @@
#include "iwl-agn-hw.h"
/* Highest firmware API version supported */
-#define IWL8000_UCODE_API_MAX 17
+#define IWL8000_UCODE_API_MAX 19
/* Oldest version we won't warn about */
#define IWL8000_UCODE_API_OK 13
diff --git a/drivers/net/wireless/iwlwifi/mvm/d3.c b/drivers/net/wireless/iwlwifi/mvm/d3.c
index 85ae902df7c0..29ae58ebf223 100644
--- a/drivers/net/wireless/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/iwlwifi/mvm/d3.c
@@ -309,9 +309,9 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
* to transmit packets to the AP, i.e. the PTK.
*/
if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) {
- key->hw_key_idx = 0;
mvm->ptk_ivlen = key->iv_len;
mvm->ptk_icvlen = key->icv_len;
+ ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, 0);
} else {
/*
* firmware only supports TSC/RSC for a single key,
@@ -319,12 +319,11 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
* with new ones -- this relies on mac80211 doing
* list_add_tail().
*/
- key->hw_key_idx = 1;
mvm->gtk_ivlen = key->iv_len;
mvm->gtk_icvlen = key->icv_len;
+ ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, 1);
}
- ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, true);
data->error = ret != 0;
out_unlock:
mutex_unlock(&mvm->mutex);
@@ -772,9 +771,6 @@ static int iwl_mvm_switch_to_d3(struct iwl_mvm *mvm)
*/
set_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status);
- /* We reprogram keys and shouldn't allocate new key indices */
- memset(mvm->fw_key_table, 0, sizeof(mvm->fw_key_table));
-
mvm->ptk_ivlen = 0;
mvm->ptk_icvlen = 0;
mvm->ptk_ivlen = 0;
diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
index 1fb684693040..e88afac51c5d 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
@@ -2941,6 +2941,7 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw,
{
struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
int ret;
+ u8 key_offset;
if (iwlwifi_mod_params.sw_crypto) {
IWL_DEBUG_MAC80211(mvm, "leave - hwcrypto disabled\n");
@@ -3006,10 +3007,14 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw,
break;
}
+ /* in HW restart reuse the index, otherwise request a new one */
+ if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status))
+ key_offset = key->hw_key_idx;
+ else
+ key_offset = STA_KEY_IDX_INVALID;
+
IWL_DEBUG_MAC80211(mvm, "set hwcrypto key\n");
- ret = iwl_mvm_set_sta_key(mvm, vif, sta, key,
- test_bit(IWL_MVM_STATUS_IN_HW_RESTART,
- &mvm->status));
+ ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, key_offset);
if (ret) {
IWL_WARN(mvm, "set key failed\n");
/*
diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.c b/drivers/net/wireless/iwlwifi/mvm/sta.c
index 300a249486e4..2b976b110207 100644
--- a/drivers/net/wireless/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/iwlwifi/mvm/sta.c
@@ -1201,7 +1201,8 @@ static int iwl_mvm_set_fw_key_idx(struct iwl_mvm *mvm)
return max_offs;
}
-static u8 iwl_mvm_get_key_sta_id(struct ieee80211_vif *vif,
+static u8 iwl_mvm_get_key_sta_id(struct iwl_mvm *mvm,
+ struct ieee80211_vif *vif,
struct ieee80211_sta *sta)
{
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
@@ -1218,8 +1219,21 @@ static u8 iwl_mvm_get_key_sta_id(struct ieee80211_vif *vif,
* station ID, then use AP's station ID.
*/
if (vif->type == NL80211_IFTYPE_STATION &&
- mvmvif->ap_sta_id != IWL_MVM_STATION_COUNT)
- return mvmvif->ap_sta_id;
+ mvmvif->ap_sta_id != IWL_MVM_STATION_COUNT) {
+ u8 sta_id = mvmvif->ap_sta_id;
+
+ sta = rcu_dereference_check(mvm->fw_id_to_mac_id[sta_id],
+ lockdep_is_held(&mvm->mutex));
+ /*
+ * It is possible that the 'sta' parameter is NULL,
+ * for example when a GTK is removed - the sta_id will then
+ * be the AP ID, and no station was passed by mac80211.
+ */
+ if (IS_ERR_OR_NULL(sta))
+ return IWL_MVM_STATION_COUNT;
+
+ return sta_id;
+ }
return IWL_MVM_STATION_COUNT;
}
@@ -1227,7 +1241,8 @@ static u8 iwl_mvm_get_key_sta_id(struct ieee80211_vif *vif,
static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
struct iwl_mvm_sta *mvm_sta,
struct ieee80211_key_conf *keyconf, bool mcast,
- u32 tkip_iv32, u16 *tkip_p1k, u32 cmd_flags)
+ u32 tkip_iv32, u16 *tkip_p1k, u32 cmd_flags,
+ u8 key_offset)
{
struct iwl_mvm_add_sta_key_cmd cmd = {};
__le16 key_flags;
@@ -1269,7 +1284,7 @@ static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
if (mcast)
key_flags |= cpu_to_le16(STA_KEY_MULTICAST);
- cmd.key_offset = keyconf->hw_key_idx;
+ cmd.key_offset = key_offset;
cmd.key_flags = key_flags;
cmd.sta_id = sta_id;
@@ -1360,6 +1375,7 @@ static int __iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
struct ieee80211_key_conf *keyconf,
+ u8 key_offset,
bool mcast)
{
struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta);
@@ -1375,17 +1391,17 @@ static int __iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
ieee80211_get_key_rx_seq(keyconf, 0, &seq);
ieee80211_get_tkip_rx_p1k(keyconf, addr, seq.tkip.iv32, p1k);
ret = iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast,
- seq.tkip.iv32, p1k, 0);
+ seq.tkip.iv32, p1k, 0, key_offset);
break;
case WLAN_CIPHER_SUITE_CCMP:
case WLAN_CIPHER_SUITE_WEP40:
case WLAN_CIPHER_SUITE_WEP104:
ret = iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast,
- 0, NULL, 0);
+ 0, NULL, 0, key_offset);
break;
default:
ret = iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast,
- 0, NULL, 0);
+ 0, NULL, 0, key_offset);
}
return ret;
@@ -1433,7 +1449,7 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
struct ieee80211_key_conf *keyconf,
- bool have_key_offset)
+ u8 key_offset)
{
bool mcast = !(keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE);
u8 sta_id;
@@ -1443,7 +1459,7 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
lockdep_assert_held(&mvm->mutex);
/* Get the station id from the mvm local station table */
- sta_id = iwl_mvm_get_key_sta_id(vif, sta);
+ sta_id = iwl_mvm_get_key_sta_id(mvm, vif, sta);
if (sta_id == IWL_MVM_STATION_COUNT) {
IWL_ERR(mvm, "Failed to find station id\n");
return -EINVAL;
@@ -1470,18 +1486,25 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
if (WARN_ON_ONCE(iwl_mvm_sta_from_mac80211(sta)->vif != vif))
return -EINVAL;
- if (!have_key_offset) {
- /*
- * The D3 firmware hardcodes the PTK offset to 0, so we have to
- * configure it there. As a result, this workaround exists to
- * let the caller set the key offset (hw_key_idx), see d3.c.
- */
- keyconf->hw_key_idx = iwl_mvm_set_fw_key_idx(mvm);
- if (keyconf->hw_key_idx == STA_KEY_IDX_INVALID)
+ /* If the key_offset is not pre-assigned, we need to find a
+ * new offset to use. In normal cases, the offset is not
+ * pre-assigned, but during HW_RESTART we want to reuse the
+ * same indices, so we pass them when this function is called.
+ *
+ * In D3 entry, we need to hardcoded the indices (because the
+ * firmware hardcodes the PTK offset to 0). In this case, we
+ * need to make sure we don't overwrite the hw_key_idx in the
+ * keyconf structure, because otherwise we cannot configure
+ * the original ones back when resuming.
+ */
+ if (key_offset == STA_KEY_IDX_INVALID) {
+ key_offset = iwl_mvm_set_fw_key_idx(mvm);
+ if (key_offset == STA_KEY_IDX_INVALID)
return -ENOSPC;
+ keyconf->hw_key_idx = key_offset;
}
- ret = __iwl_mvm_set_sta_key(mvm, vif, sta, keyconf, mcast);
+ ret = __iwl_mvm_set_sta_key(mvm, vif, sta, keyconf, key_offset, mcast);
if (ret) {
__clear_bit(keyconf->hw_key_idx, mvm->fw_key_table);
goto end;
@@ -1495,7 +1518,8 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
*/
if (keyconf->cipher == WLAN_CIPHER_SUITE_WEP40 ||
keyconf->cipher == WLAN_CIPHER_SUITE_WEP104) {
- ret = __iwl_mvm_set_sta_key(mvm, vif, sta, keyconf, !mcast);
+ ret = __iwl_mvm_set_sta_key(mvm, vif, sta, keyconf,
+ key_offset, !mcast);
if (ret) {
__clear_bit(keyconf->hw_key_idx, mvm->fw_key_table);
__iwl_mvm_remove_sta_key(mvm, sta_id, keyconf, mcast);
@@ -1521,7 +1545,7 @@ int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm,
lockdep_assert_held(&mvm->mutex);
/* Get the station id from the mvm local station table */
- sta_id = iwl_mvm_get_key_sta_id(vif, sta);
+ sta_id = iwl_mvm_get_key_sta_id(mvm, vif, sta);
IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n",
keyconf->keyidx, sta_id);
@@ -1547,24 +1571,6 @@ int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm,
return 0;
}
- /*
- * It is possible that the 'sta' parameter is NULL, and thus
- * there is a need to retrieve the sta from the local station table,
- * for example when a GTK is removed (where the sta_id will then be
- * the AP ID, and no station was passed by mac80211.)
- */
- if (!sta) {
- sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id],
- lockdep_is_held(&mvm->mutex));
- if (!sta) {
- IWL_ERR(mvm, "Invalid station id\n");
- return -EINVAL;
- }
- }
-
- if (WARN_ON_ONCE(iwl_mvm_sta_from_mac80211(sta)->vif != vif))
- return -EINVAL;
-
ret = __iwl_mvm_remove_sta_key(mvm, sta_id, keyconf, mcast);
if (ret)
return ret;
@@ -1584,14 +1590,15 @@ void iwl_mvm_update_tkip_key(struct iwl_mvm *mvm,
u16 *phase1key)
{
struct iwl_mvm_sta *mvm_sta;
- u8 sta_id = iwl_mvm_get_key_sta_id(vif, sta);
+ u8 sta_id;
bool mcast = !(keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE);
- if (WARN_ON_ONCE(sta_id == IWL_MVM_STATION_COUNT))
- return;
-
rcu_read_lock();
+ sta_id = iwl_mvm_get_key_sta_id(mvm, vif, sta);
+ if (WARN_ON_ONCE(sta_id == IWL_MVM_STATION_COUNT))
+ goto unlock;
+
if (!sta) {
sta = rcu_dereference(mvm->fw_id_to_mac_id[sta_id]);
if (WARN_ON(IS_ERR_OR_NULL(sta))) {
@@ -1602,7 +1609,9 @@ void iwl_mvm_update_tkip_key(struct iwl_mvm *mvm,
mvm_sta = iwl_mvm_sta_from_mac80211(sta);
iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast,
- iv32, phase1key, CMD_ASYNC);
+ iv32, phase1key, CMD_ASYNC, keyconf->hw_key_idx);
+
+ unlock:
rcu_read_unlock();
}
diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.h b/drivers/net/wireless/iwlwifi/mvm/sta.h
index eedb215eba3f..0631cc0a6d3c 100644
--- a/drivers/net/wireless/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/iwlwifi/mvm/sta.h
@@ -365,8 +365,8 @@ int iwl_mvm_rm_sta_id(struct iwl_mvm *mvm,
int iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
- struct ieee80211_key_conf *key,
- bool have_key_offset);
+ struct ieee80211_key_conf *keyconf,
+ u8 key_offset);
int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c
index 644b58bc5226..639761fb2bfb 100644
--- a/drivers/net/wireless/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/iwlwifi/pcie/drv.c
@@ -423,14 +423,21 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
/* 8000 Series */
{IWL_PCI_DEVICE(0x24F3, 0x0010, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0x1010, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x0130, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x1130, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x0132, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x1132, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0x0110, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x01F0, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x0012, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x1012, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0x1110, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0x0050, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0x0250, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0x1050, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0x0150, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x1150, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F4, 0x0030, iwl8260_2ac_cfg)},
- {IWL_PCI_DEVICE(0x24F4, 0x1130, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F4, 0x1030, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0xC010, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0xC110, iwl8260_2ac_cfg)},
@@ -438,18 +445,28 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
{IWL_PCI_DEVICE(0x24F3, 0xC050, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0xD050, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0x8010, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x8110, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0x9010, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x9110, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F4, 0x8030, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F4, 0x9030, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x8130, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x9130, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x8132, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x9132, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0x8050, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x8150, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0x9050, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x9150, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0x0004, iwl8260_2n_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x0044, iwl8260_2n_cfg)},
{IWL_PCI_DEVICE(0x24F5, 0x0010, iwl4165_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F6, 0x0030, iwl4165_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0x0810, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0x0910, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0x0850, iwl8260_2ac_cfg)},
{IWL_PCI_DEVICE(0x24F3, 0x0950, iwl8260_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x24F3, 0x0930, iwl8260_2ac_cfg)},
#endif /* CONFIG_IWLMVM */
{0}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
index 6e9418ed90c2..bbb789f8990b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
@@ -2272,7 +2272,7 @@ void rtl8821ae_enable_interrupt(struct ieee80211_hw *hw)
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
- if (!rtlpci->int_clear)
+ if (rtlpci->int_clear)
rtl8821ae_clear_interrupt(hw);/*clear it here first*/
rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] & 0xFFFFFFFF);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
index 8ee141a55bc5..142bdff4ed60 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
@@ -448,7 +448,7 @@ MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n");
MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 1)\n");
MODULE_PARM_DESC(debug, "Set debug level (0-5) (default 0)");
MODULE_PARM_DESC(disable_watchdog, "Set to 1 to disable the watchdog (default 0)\n");
-MODULE_PARM_DESC(int_clear, "Set to 1 to disable interrupt clear before set (default 0)\n");
+MODULE_PARM_DESC(int_clear, "Set to 0 to disable interrupt clear before set (default 1)\n");
static SIMPLE_DEV_PM_OPS(rtlwifi_pm_ops, rtl_pci_suspend, rtl_pci_resume);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index e481f3710bd3..1049c34e7d43 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -258,18 +258,18 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue,
struct netrx_pending_operations *npo)
{
struct xenvif_rx_meta *meta;
- struct xen_netif_rx_request *req;
+ struct xen_netif_rx_request req;
- req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
+ RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
meta = npo->meta + npo->meta_prod++;
meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
meta->gso_size = 0;
meta->size = 0;
- meta->id = req->id;
+ meta->id = req.id;
npo->copy_off = 0;
- npo->copy_gref = req->gref;
+ npo->copy_gref = req.gref;
return meta;
}
@@ -424,7 +424,7 @@ static int xenvif_gop_skb(struct sk_buff *skb,
struct xenvif *vif = netdev_priv(skb->dev);
int nr_frags = skb_shinfo(skb)->nr_frags;
int i;
- struct xen_netif_rx_request *req;
+ struct xen_netif_rx_request req;
struct xenvif_rx_meta *meta;
unsigned char *data;
int head = 1;
@@ -443,15 +443,15 @@ static int xenvif_gop_skb(struct sk_buff *skb,
/* Set up a GSO prefix descriptor, if necessary */
if ((1 << gso_type) & vif->gso_prefix_mask) {
- req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
+ RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
meta = npo->meta + npo->meta_prod++;
meta->gso_type = gso_type;
meta->gso_size = skb_shinfo(skb)->gso_size;
meta->size = 0;
- meta->id = req->id;
+ meta->id = req.id;
}
- req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
+ RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
meta = npo->meta + npo->meta_prod++;
if ((1 << gso_type) & vif->gso_mask) {
@@ -463,9 +463,9 @@ static int xenvif_gop_skb(struct sk_buff *skb,
}
meta->size = 0;
- meta->id = req->id;
+ meta->id = req.id;
npo->copy_off = 0;
- npo->copy_gref = req->gref;
+ npo->copy_gref = req.gref;
data = skb->data;
while (data < skb_tail_pointer(skb)) {
@@ -679,9 +679,7 @@ static void tx_add_credit(struct xenvif_queue *queue)
* Allow a burst big enough to transmit a jumbo packet of up to 128kB.
* Otherwise the interface can seize up due to insufficient credit.
*/
- max_burst = RING_GET_REQUEST(&queue->tx, queue->tx.req_cons)->size;
- max_burst = min(max_burst, 131072UL);
- max_burst = max(max_burst, queue->credit_bytes);
+ max_burst = max(131072UL, queue->credit_bytes);
/* Take care that adding a new chunk of credit doesn't wrap to zero. */
max_credit = queue->remaining_credit + queue->credit_bytes;
@@ -711,7 +709,7 @@ static void xenvif_tx_err(struct xenvif_queue *queue,
spin_unlock_irqrestore(&queue->response_lock, flags);
if (cons == end)
break;
- txp = RING_GET_REQUEST(&queue->tx, cons++);
+ RING_COPY_REQUEST(&queue->tx, cons++, txp);
} while (1);
queue->tx.req_cons = cons;
}
@@ -778,8 +776,7 @@ static int xenvif_count_requests(struct xenvif_queue *queue,
if (drop_err)
txp = &dropped_tx;
- memcpy(txp, RING_GET_REQUEST(&queue->tx, cons + slots),
- sizeof(*txp));
+ RING_COPY_REQUEST(&queue->tx, cons + slots, txp);
/* If the guest submitted a frame >= 64 KiB then
* first->size overflowed and following slots will
@@ -1112,8 +1109,7 @@ static int xenvif_get_extras(struct xenvif_queue *queue,
return -EBADR;
}
- memcpy(&extra, RING_GET_REQUEST(&queue->tx, cons),
- sizeof(extra));
+ RING_COPY_REQUEST(&queue->tx, cons, &extra);
if (unlikely(!extra.type ||
extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
queue->tx.req_cons = ++cons;
@@ -1322,7 +1318,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
idx = queue->tx.req_cons;
rmb(); /* Ensure that we see the request before we copy it. */
- memcpy(&txreq, RING_GET_REQUEST(&queue->tx, idx), sizeof(txreq));
+ RING_COPY_REQUEST(&queue->tx, idx, &txreq);
/* Credit-based scheduling. */
if (txreq.size > queue->remaining_credit &&
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 219dc206fa5f..a5fe23952586 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -1,4 +1,5 @@
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
-nvme-y += pci.o scsi.o lightnvm.o
+lightnvm-$(CONFIG_NVM) := lightnvm.o
+nvme-y += pci.o scsi.o $(lightnvm-y)
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index e0b7b95813bc..15f2acb4d5cd 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -22,8 +22,6 @@
#include "nvme.h"
-#ifdef CONFIG_NVM
-
#include <linux/nvme.h>
#include <linux/bitops.h>
#include <linux/lightnvm.h>
@@ -93,7 +91,7 @@ struct nvme_nvm_l2ptbl {
__le16 cdw14[6];
};
-struct nvme_nvm_bbtbl {
+struct nvme_nvm_getbbtbl {
__u8 opcode;
__u8 flags;
__u16 command_id;
@@ -101,10 +99,23 @@ struct nvme_nvm_bbtbl {
__u64 rsvd[2];
__le64 prp1;
__le64 prp2;
- __le32 prp1_len;
- __le32 prp2_len;
- __le32 lbb;
- __u32 rsvd11[3];
+ __le64 spba;
+ __u32 rsvd4[4];
+};
+
+struct nvme_nvm_setbbtbl {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __le64 rsvd[2];
+ __le64 prp1;
+ __le64 prp2;
+ __le64 spba;
+ __le16 nlb;
+ __u8 value;
+ __u8 rsvd3;
+ __u32 rsvd4[3];
};
struct nvme_nvm_erase_blk {
@@ -129,8 +140,8 @@ struct nvme_nvm_command {
struct nvme_nvm_hb_rw hb_rw;
struct nvme_nvm_ph_rw ph_rw;
struct nvme_nvm_l2ptbl l2p;
- struct nvme_nvm_bbtbl get_bb;
- struct nvme_nvm_bbtbl set_bb;
+ struct nvme_nvm_getbbtbl get_bb;
+ struct nvme_nvm_setbbtbl set_bb;
struct nvme_nvm_erase_blk erase;
};
};
@@ -142,11 +153,13 @@ struct nvme_nvm_id_group {
__u8 num_ch;
__u8 num_lun;
__u8 num_pln;
+ __u8 rsvd1;
__le16 num_blk;
__le16 num_pg;
__le16 fpg_sz;
__le16 csecs;
__le16 sos;
+ __le16 rsvd2;
__le32 trdt;
__le32 trdm;
__le32 tprt;
@@ -154,8 +167,9 @@ struct nvme_nvm_id_group {
__le32 tbet;
__le32 tbem;
__le32 mpos;
+ __le32 mccap;
__le16 cpar;
- __u8 reserved[913];
+ __u8 reserved[906];
} __packed;
struct nvme_nvm_addr_format {
@@ -178,15 +192,28 @@ struct nvme_nvm_id {
__u8 ver_id;
__u8 vmnt;
__u8 cgrps;
- __u8 res[5];
+ __u8 res;
__le32 cap;
__le32 dom;
struct nvme_nvm_addr_format ppaf;
- __u8 ppat;
- __u8 resv[223];
+ __u8 resv[228];
struct nvme_nvm_id_group groups[4];
} __packed;
+struct nvme_nvm_bb_tbl {
+ __u8 tblid[4];
+ __le16 verid;
+ __le16 revid;
+ __le32 rvsd1;
+ __le32 tblks;
+ __le32 tfact;
+ __le32 tgrown;
+ __le32 tdresv;
+ __le32 thresv;
+ __le32 rsvd2[8];
+ __u8 blk[0];
+};
+
/*
* Check we didn't inadvertently grow the command struct
*/
@@ -195,12 +222,14 @@ static inline void _nvme_nvm_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_hb_rw) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_bbtbl) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 128);
BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096);
+ BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 512);
}
static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
@@ -234,6 +263,7 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
dst->tbet = le32_to_cpu(src->tbet);
dst->tbem = le32_to_cpu(src->tbem);
dst->mpos = le32_to_cpu(src->mpos);
+ dst->mccap = le32_to_cpu(src->mccap);
dst->cpar = le16_to_cpu(src->cpar);
}
@@ -241,9 +271,10 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
return 0;
}
-static int nvme_nvm_identity(struct request_queue *q, struct nvm_id *nvm_id)
+static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id)
{
- struct nvme_ns *ns = q->queuedata;
+ struct nvme_ns *ns = nvmdev->q->queuedata;
+ struct nvme_dev *dev = ns->dev;
struct nvme_nvm_id *nvme_nvm_id;
struct nvme_nvm_command c = {};
int ret;
@@ -256,8 +287,8 @@ static int nvme_nvm_identity(struct request_queue *q, struct nvm_id *nvm_id)
if (!nvme_nvm_id)
return -ENOMEM;
- ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, nvme_nvm_id,
- sizeof(struct nvme_nvm_id));
+ ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+ nvme_nvm_id, sizeof(struct nvme_nvm_id));
if (ret) {
ret = -EIO;
goto out;
@@ -268,6 +299,8 @@ static int nvme_nvm_identity(struct request_queue *q, struct nvm_id *nvm_id)
nvm_id->cgrps = nvme_nvm_id->cgrps;
nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap);
nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom);
+ memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf,
+ sizeof(struct nvme_nvm_addr_format));
ret = init_grps(nvm_id, nvme_nvm_id);
out:
@@ -275,13 +308,13 @@ out:
return ret;
}
-static int nvme_nvm_get_l2p_tbl(struct request_queue *q, u64 slba, u32 nlb,
+static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
nvm_l2p_update_fn *update_l2p, void *priv)
{
- struct nvme_ns *ns = q->queuedata;
+ struct nvme_ns *ns = nvmdev->q->queuedata;
struct nvme_dev *dev = ns->dev;
struct nvme_nvm_command c = {};
- u32 len = queue_max_hw_sectors(q) << 9;
+ u32 len = queue_max_hw_sectors(dev->admin_q) << 9;
u32 nlb_pr_rq = len / sizeof(u64);
u64 cmd_slba = slba;
void *entries;
@@ -299,8 +332,8 @@ static int nvme_nvm_get_l2p_tbl(struct request_queue *q, u64 slba, u32 nlb,
c.l2p.slba = cpu_to_le64(cmd_slba);
c.l2p.nlb = cpu_to_le32(cmd_nlb);
- ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c,
- entries, len);
+ ret = nvme_submit_sync_cmd(dev->admin_q,
+ (struct nvme_command *)&c, entries, len);
if (ret) {
dev_err(dev->dev, "L2P table transfer failed (%d)\n",
ret);
@@ -322,43 +355,84 @@ out:
return ret;
}
-static int nvme_nvm_get_bb_tbl(struct request_queue *q, int lunid,
- unsigned int nr_blocks,
- nvm_bb_update_fn *update_bbtbl, void *priv)
+static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
+ int nr_blocks, nvm_bb_update_fn *update_bbtbl,
+ void *priv)
{
+ struct request_queue *q = nvmdev->q;
struct nvme_ns *ns = q->queuedata;
struct nvme_dev *dev = ns->dev;
struct nvme_nvm_command c = {};
- void *bb_bitmap;
- u16 bb_bitmap_size;
+ struct nvme_nvm_bb_tbl *bb_tbl;
+ int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blocks;
int ret = 0;
c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl;
c.get_bb.nsid = cpu_to_le32(ns->ns_id);
- c.get_bb.lbb = cpu_to_le32(lunid);
- bb_bitmap_size = ((nr_blocks >> 15) + 1) * PAGE_SIZE;
- bb_bitmap = kmalloc(bb_bitmap_size, GFP_KERNEL);
- if (!bb_bitmap)
- return -ENOMEM;
+ c.get_bb.spba = cpu_to_le64(ppa.ppa);
- bitmap_zero(bb_bitmap, nr_blocks);
+ bb_tbl = kzalloc(tblsz, GFP_KERNEL);
+ if (!bb_tbl)
+ return -ENOMEM;
- ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, bb_bitmap,
- bb_bitmap_size);
+ ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+ bb_tbl, tblsz);
if (ret) {
dev_err(dev->dev, "get bad block table failed (%d)\n", ret);
ret = -EIO;
goto out;
}
- ret = update_bbtbl(lunid, bb_bitmap, nr_blocks, priv);
+ if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' ||
+ bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') {
+ dev_err(dev->dev, "bbt format mismatch\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (le16_to_cpu(bb_tbl->verid) != 1) {
+ ret = -EINVAL;
+ dev_err(dev->dev, "bbt version not supported\n");
+ goto out;
+ }
+
+ if (le32_to_cpu(bb_tbl->tblks) != nr_blocks) {
+ ret = -EINVAL;
+ dev_err(dev->dev, "bbt unsuspected blocks returned (%u!=%u)",
+ le32_to_cpu(bb_tbl->tblks), nr_blocks);
+ goto out;
+ }
+
+ ppa = dev_to_generic_addr(nvmdev, ppa);
+ ret = update_bbtbl(ppa, nr_blocks, bb_tbl->blk, priv);
if (ret) {
ret = -EINTR;
goto out;
}
out:
- kfree(bb_bitmap);
+ kfree(bb_tbl);
+ return ret;
+}
+
+static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct nvm_rq *rqd,
+ int type)
+{
+ struct nvme_ns *ns = nvmdev->q->queuedata;
+ struct nvme_dev *dev = ns->dev;
+ struct nvme_nvm_command c = {};
+ int ret = 0;
+
+ c.set_bb.opcode = nvme_nvm_admin_set_bb_tbl;
+ c.set_bb.nsid = cpu_to_le32(ns->ns_id);
+ c.set_bb.spba = cpu_to_le64(rqd->ppa_addr.ppa);
+ c.set_bb.nlb = cpu_to_le16(rqd->nr_pages - 1);
+ c.set_bb.value = type;
+
+ ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+ NULL, 0);
+ if (ret)
+ dev_err(dev->dev, "set bad block table failed (%d)\n", ret);
return ret;
}
@@ -381,7 +455,7 @@ static void nvme_nvm_end_io(struct request *rq, int error)
struct nvm_rq *rqd = rq->end_io_data;
struct nvm_dev *dev = rqd->dev;
- if (dev->mt->end_io(rqd, error))
+ if (dev->mt && dev->mt->end_io(rqd, error))
pr_err("nvme: err status: %x result: %lx\n",
rq->errors, (unsigned long)rq->special);
@@ -389,8 +463,9 @@ static void nvme_nvm_end_io(struct request *rq, int error)
blk_mq_free_request(rq);
}
-static int nvme_nvm_submit_io(struct request_queue *q, struct nvm_rq *rqd)
+static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
{
+ struct request_queue *q = dev->q;
struct nvme_ns *ns = q->queuedata;
struct request *rq;
struct bio *bio = rqd->bio;
@@ -428,8 +503,9 @@ static int nvme_nvm_submit_io(struct request_queue *q, struct nvm_rq *rqd)
return 0;
}
-static int nvme_nvm_erase_block(struct request_queue *q, struct nvm_rq *rqd)
+static int nvme_nvm_erase_block(struct nvm_dev *dev, struct nvm_rq *rqd)
{
+ struct request_queue *q = dev->q;
struct nvme_ns *ns = q->queuedata;
struct nvme_nvm_command c = {};
@@ -441,9 +517,9 @@ static int nvme_nvm_erase_block(struct request_queue *q, struct nvm_rq *rqd)
return nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0);
}
-static void *nvme_nvm_create_dma_pool(struct request_queue *q, char *name)
+static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name)
{
- struct nvme_ns *ns = q->queuedata;
+ struct nvme_ns *ns = nvmdev->q->queuedata;
struct nvme_dev *dev = ns->dev;
return dma_pool_create(name, dev->dev, PAGE_SIZE, PAGE_SIZE, 0);
@@ -456,7 +532,7 @@ static void nvme_nvm_destroy_dma_pool(void *pool)
dma_pool_destroy(dma_pool);
}
-static void *nvme_nvm_dev_dma_alloc(struct request_queue *q, void *pool,
+static void *nvme_nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
gfp_t mem_flags, dma_addr_t *dma_handler)
{
return dma_pool_alloc(pool, mem_flags, dma_handler);
@@ -474,6 +550,7 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
.get_l2p_tbl = nvme_nvm_get_l2p_tbl,
.get_bb_tbl = nvme_nvm_get_bb_tbl,
+ .set_bb_tbl = nvme_nvm_set_bb_tbl,
.submit_io = nvme_nvm_submit_io,
.erase_block = nvme_nvm_erase_block,
@@ -496,31 +573,27 @@ void nvme_nvm_unregister(struct request_queue *q, char *disk_name)
nvm_unregister(disk_name);
}
+/* move to shared place when used in multiple places. */
+#define PCI_VENDOR_ID_CNEX 0x1d1d
+#define PCI_DEVICE_ID_CNEX_WL 0x2807
+#define PCI_DEVICE_ID_CNEX_QEMU 0x1f1f
+
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
{
struct nvme_dev *dev = ns->dev;
struct pci_dev *pdev = to_pci_dev(dev->dev);
/* QEMU NVMe simulator - PCI ID + Vendor specific bit */
- if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == 0x5845 &&
+ if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
+ pdev->device == PCI_DEVICE_ID_CNEX_QEMU &&
id->vs[0] == 0x1)
return 1;
/* CNEX Labs - PCI ID + Vendor specific bit */
- if (pdev->vendor == 0x1d1d && pdev->device == 0x2807 &&
+ if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
+ pdev->device == PCI_DEVICE_ID_CNEX_WL &&
id->vs[0] == 0x1)
return 1;
return 0;
}
-#else
-int nvme_nvm_register(struct request_queue *q, char *disk_name)
-{
- return 0;
-}
-void nvme_nvm_unregister(struct request_queue *q, char *disk_name) {};
-int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
-{
- return 0;
-}
-#endif /* CONFIG_NVM */
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index fdb4e5bad9ac..044253dca30a 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -136,8 +136,22 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
int nvme_sg_get_version_num(int __user *ip);
+#ifdef CONFIG_NVM
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
int nvme_nvm_register(struct request_queue *q, char *disk_name);
void nvme_nvm_unregister(struct request_queue *q, char *disk_name);
+#else
+static inline int nvme_nvm_register(struct request_queue *q, char *disk_name)
+{
+ return 0;
+}
+
+static inline void nvme_nvm_unregister(struct request_queue *q, char *disk_name) {};
+
+static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
+{
+ return 0;
+}
+#endif /* CONFIG_NVM */
#endif /* _NVME_H */
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 8187df204695..0c67b57be83c 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -896,19 +896,28 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
goto retry_cmd;
}
if (blk_integrity_rq(req)) {
- if (blk_rq_count_integrity_sg(req->q, req->bio) != 1)
+ if (blk_rq_count_integrity_sg(req->q, req->bio) != 1) {
+ dma_unmap_sg(dev->dev, iod->sg, iod->nents,
+ dma_dir);
goto error_cmd;
+ }
sg_init_table(iod->meta_sg, 1);
if (blk_rq_map_integrity_sg(
- req->q, req->bio, iod->meta_sg) != 1)
+ req->q, req->bio, iod->meta_sg) != 1) {
+ dma_unmap_sg(dev->dev, iod->sg, iod->nents,
+ dma_dir);
goto error_cmd;
+ }
if (rq_data_dir(req))
nvme_dif_remap(req, nvme_dif_prep);
- if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir))
+ if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir)) {
+ dma_unmap_sg(dev->dev, iod->sg, iod->nents,
+ dma_dir);
goto error_cmd;
+ }
}
}
@@ -968,7 +977,8 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
return;
- writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
+ if (likely(nvmeq->cq_vector >= 0))
+ writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
nvmeq->cq_head = head;
nvmeq->cq_phase = phase;
@@ -1727,9 +1737,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
u32 aqa;
u64 cap = lo_hi_readq(&dev->bar->cap);
struct nvme_queue *nvmeq;
- unsigned page_shift = PAGE_SHIFT;
+ /*
+ * default to a 4K page size, with the intention to update this
+ * path in the future to accomodate architectures with differing
+ * kernel and IO page sizes.
+ */
+ unsigned page_shift = 12;
unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12;
- unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12;
if (page_shift < dev_page_min) {
dev_err(dev->dev,
@@ -1738,13 +1752,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
1 << page_shift);
return -ENODEV;
}
- if (page_shift > dev_page_max) {
- dev_info(dev->dev,
- "Device maximum page size (%u) smaller than "
- "host (%u); enabling work-around\n",
- 1 << dev_page_max, 1 << page_shift);
- page_shift = dev_page_max;
- }
dev->subsystem = readl(&dev->bar->vs) >= NVME_VS(1, 1) ?
NVME_CAP_NSSRC(cap) : 0;
@@ -2268,7 +2275,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
if (dev->max_hw_sectors) {
blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
blk_queue_max_segments(ns->queue,
- ((dev->max_hw_sectors << 9) / dev->page_size) + 1);
+ (dev->max_hw_sectors / (dev->page_size >> 9)) + 1);
}
if (dev->stripe_size)
blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9);
@@ -2533,8 +2540,17 @@ static void nvme_ns_remove(struct nvme_ns *ns)
{
bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue);
- if (kill)
+ if (kill) {
blk_set_queue_dying(ns->queue);
+
+ /*
+ * The controller was shutdown first if we got here through
+ * device removal. The shutdown may requeue outstanding
+ * requests. These need to be aborted immediately so
+ * del_gendisk doesn't block indefinitely for their completion.
+ */
+ blk_mq_abort_requeue_list(ns->queue);
+ }
if (ns->disk->flags & GENHD_FL_UP)
del_gendisk(ns->disk);
if (kill || !blk_queue_dying(ns->queue)) {
@@ -2701,6 +2717,18 @@ static int nvme_dev_map(struct nvme_dev *dev)
dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
dev->dbs = ((void __iomem *)dev->bar) + 4096;
+
+ /*
+ * Temporary fix for the Apple controller found in the MacBook8,1 and
+ * some MacBook7,1 to avoid controller resets and data loss.
+ */
+ if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) {
+ dev->q_depth = 2;
+ dev_warn(dev->dev, "detected Apple NVMe controller, set "
+ "queue depth=%u to work around controller resets\n",
+ dev->q_depth);
+ }
+
if (readl(&dev->bar->vs) >= NVME_VS(1, 2))
dev->cmb = nvme_map_cmb(dev);
@@ -2787,6 +2815,10 @@ static void nvme_del_queue_end(struct nvme_queue *nvmeq)
{
struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx;
nvme_put_dq(dq);
+
+ spin_lock_irq(&nvmeq->q_lock);
+ nvme_process_cq(nvmeq);
+ spin_unlock_irq(&nvmeq->q_lock);
}
static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode,
@@ -2954,6 +2986,15 @@ static void nvme_dev_remove(struct nvme_dev *dev)
{
struct nvme_ns *ns, *next;
+ if (nvme_io_incapable(dev)) {
+ /*
+ * If the device is not capable of IO (surprise hot-removal,
+ * for example), we need to quiesce prior to deleting the
+ * namespaces. This will end outstanding requests and prevent
+ * attempts to sync dirty data.
+ */
+ nvme_dev_shutdown(dev);
+ }
list_for_each_entry_safe(ns, next, &dev->namespaces, list)
nvme_ns_remove(ns);
}
diff --git a/drivers/of/address.c b/drivers/of/address.c
index cd53fe4a0c86..9582c5703b3c 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -485,9 +485,10 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus,
int rone;
u64 offset = OF_BAD_ADDR;
- /* Normally, an absence of a "ranges" property means we are
+ /*
+ * Normally, an absence of a "ranges" property means we are
* crossing a non-translatable boundary, and thus the addresses
- * below the current not cannot be converted to CPU physical ones.
+ * below the current cannot be converted to CPU physical ones.
* Unfortunately, while this is very clear in the spec, it's not
* what Apple understood, and they do have things like /uni-n or
* /ht nodes with no "ranges" property and a lot of perfectly
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index d2430298a309..655f79db7899 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/initrd.h>
#include <linux/memblock.h>
+#include <linux/mutex.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/of_reserved_mem.h>
@@ -436,6 +437,8 @@ static void *kernel_tree_alloc(u64 size, u64 align)
return kzalloc(size, GFP_KERNEL);
}
+static DEFINE_MUTEX(of_fdt_unflatten_mutex);
+
/**
* of_fdt_unflatten_tree - create tree of device_nodes from flat blob
*
@@ -447,7 +450,9 @@ static void *kernel_tree_alloc(u64 size, u64 align)
void of_fdt_unflatten_tree(const unsigned long *blob,
struct device_node **mynodes)
{
+ mutex_lock(&of_fdt_unflatten_mutex);
__unflatten_device_tree(blob, mynodes, &kernel_tree_alloc);
+ mutex_unlock(&of_fdt_unflatten_mutex);
}
EXPORT_SYMBOL_GPL(of_fdt_unflatten_tree);
@@ -1041,7 +1046,7 @@ void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
phys_addr_t size, bool nomap)
{
- pr_err("Reserved memory not supported, ignoring range 0x%pa - 0x%pa%s\n",
+ pr_err("Reserved memory not supported, ignoring range %pa - %pa%s\n",
&base, &size, nomap ? " (nomap)" : "");
return -ENOSYS;
}
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 902b89be7217..4fa916dffc91 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(irq_of_parse_and_map);
* Returns a pointer to the interrupt parent node, or NULL if the interrupt
* parent could not be determined.
*/
-static struct device_node *of_irq_find_parent(struct device_node *child)
+struct device_node *of_irq_find_parent(struct device_node *child)
{
struct device_node *p;
const __be32 *parp;
@@ -77,6 +77,7 @@ static struct device_node *of_irq_find_parent(struct device_node *child)
return p;
}
+EXPORT_SYMBOL_GPL(of_irq_find_parent);
/**
* of_irq_parse_raw - Low level interrupt tree parsing
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index be77e75c587d..1a3556a9e9ea 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -206,7 +206,13 @@ static int __init __rmem_cmp(const void *a, const void *b)
{
const struct reserved_mem *ra = a, *rb = b;
- return ra->base - rb->base;
+ if (ra->base < rb->base)
+ return -1;
+
+ if (ra->base > rb->base)
+ return 1;
+
+ return 0;
}
static void __init __rmem_check_for_overlap(void)
diff --git a/drivers/parisc/iommu-helpers.h b/drivers/parisc/iommu-helpers.h
index 761e77bfce5d..e56f1569f6c3 100644
--- a/drivers/parisc/iommu-helpers.h
+++ b/drivers/parisc/iommu-helpers.h
@@ -104,7 +104,11 @@ iommu_coalesce_chunks(struct ioc *ioc, struct device *dev,
struct scatterlist *contig_sg; /* contig chunk head */
unsigned long dma_offset, dma_len; /* start/len of DMA stream */
unsigned int n_mappings = 0;
- unsigned int max_seg_size = dma_get_max_seg_size(dev);
+ unsigned int max_seg_size = min(dma_get_max_seg_size(dev),
+ (unsigned)DMA_CHUNK_SIZE);
+ unsigned int max_seg_boundary = dma_get_seg_boundary(dev) + 1;
+ if (max_seg_boundary) /* check if the addition above didn't overflow */
+ max_seg_size = min(max_seg_size, max_seg_boundary);
while (nents > 0) {
@@ -138,14 +142,11 @@ iommu_coalesce_chunks(struct ioc *ioc, struct device *dev,
/*
** First make sure current dma stream won't
- ** exceed DMA_CHUNK_SIZE if we coalesce the
+ ** exceed max_seg_size if we coalesce the
** next entry.
*/
- if(unlikely(ALIGN(dma_len + dma_offset + startsg->length,
- IOVP_SIZE) > DMA_CHUNK_SIZE))
- break;
-
- if (startsg->length + dma_len > max_seg_size)
+ if (unlikely(ALIGN(dma_len + dma_offset + startsg->length, IOVP_SIZE) >
+ max_seg_size))
break;
/*
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index f131ba947dc6..c0ad9aaa16a7 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -5,6 +5,7 @@ config PCI_DRA7XX
bool "TI DRA7xx PCIe controller"
select PCIE_DW
depends on OF && HAS_IOMEM && TI_PIPE3
+ depends on BROKEN
help
Enables support for the PCIe controller in the DRA7xx SoC. There
are two instances of PCIe controller in DRA7xx. This controller can
diff --git a/drivers/pci/host/pcie-altera.c b/drivers/pci/host/pcie-altera.c
index e5dda38bdde5..99da549d5d06 100644
--- a/drivers/pci/host/pcie-altera.c
+++ b/drivers/pci/host/pcie-altera.c
@@ -55,8 +55,10 @@
#define TLP_CFG_DW2(bus, devfn, offset) \
(((bus) << 24) | ((devfn) << 16) | (offset))
#define TLP_REQ_ID(bus, devfn) (((bus) << 8) | (devfn))
+#define TLP_COMP_STATUS(s) (((s) >> 12) & 7)
#define TLP_HDR_SIZE 3
#define TLP_LOOP 500
+#define RP_DEVFN 0
#define INTX_NUM 4
@@ -166,34 +168,41 @@ static bool altera_pcie_valid_config(struct altera_pcie *pcie,
static int tlp_read_packet(struct altera_pcie *pcie, u32 *value)
{
- u8 loop;
+ int i;
bool sop = 0;
u32 ctrl;
u32 reg0, reg1;
+ u32 comp_status = 1;
/*
* Minimum 2 loops to read TLP headers and 1 loop to read data
* payload.
*/
- for (loop = 0; loop < TLP_LOOP; loop++) {
+ for (i = 0; i < TLP_LOOP; i++) {
ctrl = cra_readl(pcie, RP_RXCPL_STATUS);
if ((ctrl & RP_RXCPL_SOP) || (ctrl & RP_RXCPL_EOP) || sop) {
reg0 = cra_readl(pcie, RP_RXCPL_REG0);
reg1 = cra_readl(pcie, RP_RXCPL_REG1);
- if (ctrl & RP_RXCPL_SOP)
+ if (ctrl & RP_RXCPL_SOP) {
sop = true;
+ comp_status = TLP_COMP_STATUS(reg1);
+ }
if (ctrl & RP_RXCPL_EOP) {
+ if (comp_status)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
if (value)
*value = reg0;
+
return PCIBIOS_SUCCESSFUL;
}
}
udelay(5);
}
- return -ENOENT;
+ return PCIBIOS_DEVICE_NOT_FOUND;
}
static void tlp_write_packet(struct altera_pcie *pcie, u32 *headers,
@@ -233,7 +242,7 @@ static int tlp_cfg_dword_read(struct altera_pcie *pcie, u8 bus, u32 devfn,
else
headers[0] = TLP_CFG_DW0(TLP_FMTTYPE_CFGRD1);
- headers[1] = TLP_CFG_DW1(TLP_REQ_ID(pcie->root_bus_nr, devfn),
+ headers[1] = TLP_CFG_DW1(TLP_REQ_ID(pcie->root_bus_nr, RP_DEVFN),
TLP_READ_TAG, byte_en);
headers[2] = TLP_CFG_DW2(bus, devfn, where);
@@ -253,7 +262,7 @@ static int tlp_cfg_dword_write(struct altera_pcie *pcie, u8 bus, u32 devfn,
else
headers[0] = TLP_CFG_DW0(TLP_FMTTYPE_CFGWR1);
- headers[1] = TLP_CFG_DW1(TLP_REQ_ID(pcie->root_bus_nr, devfn),
+ headers[1] = TLP_CFG_DW1(TLP_REQ_ID(pcie->root_bus_nr, RP_DEVFN),
TLP_WRITE_TAG, byte_en);
headers[2] = TLP_CFG_DW2(bus, devfn, where);
@@ -458,7 +467,7 @@ static int altera_pcie_init_irq_domain(struct altera_pcie *pcie)
struct device_node *node = dev->of_node;
/* Setup INTx */
- pcie->irq_domain = irq_domain_add_linear(node, INTX_NUM,
+ pcie->irq_domain = irq_domain_add_linear(node, INTX_NUM + 1,
&intx_domain_ops, pcie);
if (!pcie->irq_domain) {
dev_err(dev, "Failed to get a INTx IRQ domain\n");
diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c
index 540f077c37ea..02a7452bdf23 100644
--- a/drivers/pci/host/pcie-designware.c
+++ b/drivers/pci/host/pcie-designware.c
@@ -440,7 +440,6 @@ int dw_pcie_host_init(struct pcie_port *pp)
ret, pp->io);
continue;
}
- pp->io_base = pp->io->start;
break;
case IORESOURCE_MEM:
pp->mem = win->res;
diff --git a/drivers/pci/host/pcie-hisi.c b/drivers/pci/host/pcie-hisi.c
index 35457ecd8e70..77f7c669a1b9 100644
--- a/drivers/pci/host/pcie-hisi.c
+++ b/drivers/pci/host/pcie-hisi.c
@@ -61,7 +61,9 @@ static int hisi_pcie_cfg_read(struct pcie_port *pp, int where, int size,
*val = *(u8 __force *) walker;
else if (size == 2)
*val = *(u16 __force *) walker;
- else if (size != 4)
+ else if (size == 4)
+ *val = reg_val;
+ else
return PCIBIOS_BAD_REGISTER_NUMBER;
return PCIBIOS_SUCCESSFUL;
@@ -111,7 +113,7 @@ static struct pcie_host_ops hisi_pcie_host_ops = {
.link_up = hisi_pcie_link_up,
};
-static int __init hisi_add_pcie_port(struct pcie_port *pp,
+static int hisi_add_pcie_port(struct pcie_port *pp,
struct platform_device *pdev)
{
int ret;
@@ -139,7 +141,7 @@ static int __init hisi_add_pcie_port(struct pcie_port *pp,
return 0;
}
-static int __init hisi_pcie_probe(struct platform_device *pdev)
+static int hisi_pcie_probe(struct platform_device *pdev)
{
struct hisi_pcie *hisi_pcie;
struct pcie_port *pp;
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 53e463244bb7..7a0df3fdbfae 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -54,7 +54,7 @@ static int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
struct irq_domain *domain;
domain = pci_msi_get_domain(dev);
- if (domain)
+ if (domain && irq_domain_is_hierarchy(domain))
return pci_msi_domain_alloc_irqs(domain, dev, nvec, type);
return arch_setup_msi_irqs(dev, nvec, type);
@@ -65,7 +65,7 @@ static void pci_msi_teardown_msi_irqs(struct pci_dev *dev)
struct irq_domain *domain;
domain = pci_msi_get_domain(dev);
- if (domain)
+ if (domain && irq_domain_is_hierarchy(domain))
pci_msi_domain_free_irqs(domain, dev);
else
arch_teardown_msi_irqs(dev);
@@ -257,6 +257,7 @@ void pci_msi_mask_irq(struct irq_data *data)
{
msi_set_mask_bit(data, 1);
}
+EXPORT_SYMBOL_GPL(pci_msi_mask_irq);
/**
* pci_msi_unmask_irq - Generic irq chip callback to unmask PCI/MSI interrupts
@@ -266,6 +267,7 @@ void pci_msi_unmask_irq(struct irq_data *data)
{
msi_set_mask_bit(data, 0);
}
+EXPORT_SYMBOL_GPL(pci_msi_unmask_irq);
void default_restore_msi_irqs(struct pci_dev *dev)
{
@@ -1126,6 +1128,7 @@ struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
{
return to_pci_dev(desc->dev);
}
+EXPORT_SYMBOL(msi_desc_to_pci_dev);
void *msi_desc_to_pci_sysdata(struct msi_desc *desc)
{
@@ -1285,6 +1288,7 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
domain->bus_token = DOMAIN_BUS_PCI_MSI;
return domain;
}
+EXPORT_SYMBOL_GPL(pci_msi_create_irq_domain);
/**
* pci_msi_domain_alloc_irqs - Allocate interrupts for @dev in @domain
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index a32ba753e413..d3f32d6417ef 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -9,7 +9,9 @@
#include <linux/delay.h>
#include <linux/init.h>
+#include <linux/irqdomain.h>
#include <linux/pci.h>
+#include <linux/msi.h>
#include <linux/pci_hotplug.h>
#include <linux/module.h>
#include <linux/pci-aspm.h>
@@ -689,6 +691,46 @@ static struct acpi_bus_type acpi_pci_bus = {
.cleanup = pci_acpi_cleanup,
};
+
+static struct fwnode_handle *(*pci_msi_get_fwnode_cb)(struct device *dev);
+
+/**
+ * pci_msi_register_fwnode_provider - Register callback to retrieve fwnode
+ * @fn: Callback matching a device to a fwnode that identifies a PCI
+ * MSI domain.
+ *
+ * This should be called by irqchip driver, which is the parent of
+ * the MSI domain to provide callback interface to query fwnode.
+ */
+void
+pci_msi_register_fwnode_provider(struct fwnode_handle *(*fn)(struct device *))
+{
+ pci_msi_get_fwnode_cb = fn;
+}
+
+/**
+ * pci_host_bridge_acpi_msi_domain - Retrieve MSI domain of a PCI host bridge
+ * @bus: The PCI host bridge bus.
+ *
+ * This function uses the callback function registered by
+ * pci_msi_register_fwnode_provider() to retrieve the irq_domain with
+ * type DOMAIN_BUS_PCI_MSI of the specified host bridge bus.
+ * This returns NULL on error or when the domain is not found.
+ */
+struct irq_domain *pci_host_bridge_acpi_msi_domain(struct pci_bus *bus)
+{
+ struct fwnode_handle *fwnode;
+
+ if (!pci_msi_get_fwnode_cb)
+ return NULL;
+
+ fwnode = pci_msi_get_fwnode_cb(&bus->dev);
+ if (!fwnode)
+ return NULL;
+
+ return irq_find_matching_fwnode(fwnode, DOMAIN_BUS_PCI_MSI);
+}
+
static int __init acpi_pci_init(void)
{
int ret;
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 4446fcb5effd..d7ffd66814bb 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -1146,9 +1146,21 @@ static int pci_pm_runtime_suspend(struct device *dev)
pci_dev->state_saved = false;
pci_dev->no_d3cold = false;
error = pm->runtime_suspend(dev);
- suspend_report_result(pm->runtime_suspend, error);
- if (error)
+ if (error) {
+ /*
+ * -EBUSY and -EAGAIN is used to request the runtime PM core
+ * to schedule a new suspend, so log the event only with debug
+ * log level.
+ */
+ if (error == -EBUSY || error == -EAGAIN)
+ dev_dbg(dev, "can't suspend now (%pf returned %d)\n",
+ pm->runtime_suspend, error);
+ else
+ dev_err(dev, "can't suspend (%pf returned %d)\n",
+ pm->runtime_suspend, error);
+
return error;
+ }
if (!pci_dev->d3cold_allowed)
pci_dev->no_d3cold = true;
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 92618686604c..eead54cd01b2 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -216,7 +216,10 @@ static ssize_t numa_node_store(struct device *dev,
if (ret)
return ret;
- if (node >= MAX_NUMNODES || !node_online(node))
+ if ((node < 0 && node != NUMA_NO_NODE) || node >= MAX_NUMNODES)
+ return -EINVAL;
+
+ if (node != NUMA_NO_NODE && !node_online(node))
return -EINVAL;
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index fd2f03fa53f3..d390fc1475ec 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -337,6 +337,4 @@ static inline int pci_dev_specific_reset(struct pci_dev *dev, int probe)
}
#endif
-struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
-
#endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index e735c728e3b3..553a029e37f1 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -672,6 +672,8 @@ static struct irq_domain *pci_host_bridge_msi_domain(struct pci_bus *bus)
* should be called from here.
*/
d = pci_host_bridge_of_msi_domain(bus);
+ if (!d)
+ d = pci_host_bridge_acpi_msi_domain(bus);
return d;
}
@@ -1685,8 +1687,8 @@ static void pci_dma_configure(struct pci_dev *dev)
{
struct device *bridge = pci_get_host_bridge_device(dev);
- if (IS_ENABLED(CONFIG_OF) && dev->dev.of_node) {
- if (bridge->parent)
+ if (IS_ENABLED(CONFIG_OF) &&
+ bridge->parent && bridge->parent->of_node) {
of_dma_configure(&dev->dev, bridge->parent->of_node);
} else if (has_acpi_companion(bridge)) {
struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index 7eb5859dd035..03cb3ea2d2c0 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -233,6 +233,7 @@ config PHY_SUN9I_USB
tristate "Allwinner sun9i SoC USB PHY driver"
depends on ARCH_SUNXI && HAS_IOMEM && OF
depends on RESET_CONTROLLER
+ depends on USB_COMMON
select GENERIC_PHY
help
Enable this to support the transceiver that is part of Allwinner
diff --git a/drivers/phy/phy-bcm-cygnus-pcie.c b/drivers/phy/phy-bcm-cygnus-pcie.c
index 7ad72b7d2b98..082c03f6438f 100644
--- a/drivers/phy/phy-bcm-cygnus-pcie.c
+++ b/drivers/phy/phy-bcm-cygnus-pcie.c
@@ -128,6 +128,7 @@ static int cygnus_pcie_phy_probe(struct platform_device *pdev)
struct phy_provider *provider;
struct resource *res;
unsigned cnt = 0;
+ int ret;
if (of_get_child_count(node) == 0) {
dev_err(dev, "PHY no child node\n");
@@ -154,24 +155,28 @@ static int cygnus_pcie_phy_probe(struct platform_device *pdev)
if (of_property_read_u32(child, "reg", &id)) {
dev_err(dev, "missing reg property for %s\n",
child->name);
- return -EINVAL;
+ ret = -EINVAL;
+ goto put_child;
}
if (id >= MAX_NUM_PHYS) {
dev_err(dev, "invalid PHY id: %u\n", id);
- return -EINVAL;
+ ret = -EINVAL;
+ goto put_child;
}
if (core->phys[id].phy) {
dev_err(dev, "duplicated PHY id: %u\n", id);
- return -EINVAL;
+ ret = -EINVAL;
+ goto put_child;
}
p = &core->phys[id];
p->phy = devm_phy_create(dev, child, &cygnus_pcie_phy_ops);
if (IS_ERR(p->phy)) {
dev_err(dev, "failed to create PHY\n");
- return PTR_ERR(p->phy);
+ ret = PTR_ERR(p->phy);
+ goto put_child;
}
p->core = core;
@@ -191,6 +196,9 @@ static int cygnus_pcie_phy_probe(struct platform_device *pdev)
dev_dbg(dev, "registered %u PCIe PHY(s)\n", cnt);
return 0;
+put_child:
+ of_node_put(child);
+ return ret;
}
static const struct of_device_id cygnus_pcie_phy_match_table[] = {
diff --git a/drivers/phy/phy-berlin-sata.c b/drivers/phy/phy-berlin-sata.c
index 77a2e054fdea..f84a33a1bdd9 100644
--- a/drivers/phy/phy-berlin-sata.c
+++ b/drivers/phy/phy-berlin-sata.c
@@ -195,7 +195,7 @@ static int phy_berlin_sata_probe(struct platform_device *pdev)
struct phy_provider *phy_provider;
struct phy_berlin_priv *priv;
struct resource *res;
- int i = 0;
+ int ret, i = 0;
u32 phy_id;
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -237,22 +237,27 @@ static int phy_berlin_sata_probe(struct platform_device *pdev)
if (of_property_read_u32(child, "reg", &phy_id)) {
dev_err(dev, "missing reg property in node %s\n",
child->name);
- return -EINVAL;
+ ret = -EINVAL;
+ goto put_child;
}
if (phy_id >= ARRAY_SIZE(phy_berlin_power_down_bits)) {
dev_err(dev, "invalid reg in node %s\n", child->name);
- return -EINVAL;
+ ret = -EINVAL;
+ goto put_child;
}
phy_desc = devm_kzalloc(dev, sizeof(*phy_desc), GFP_KERNEL);
- if (!phy_desc)
- return -ENOMEM;
+ if (!phy_desc) {
+ ret = -ENOMEM;
+ goto put_child;
+ }
phy = devm_phy_create(dev, NULL, &phy_berlin_sata_ops);
if (IS_ERR(phy)) {
dev_err(dev, "failed to create PHY %d\n", phy_id);
- return PTR_ERR(phy);
+ ret = PTR_ERR(phy);
+ goto put_child;
}
phy_desc->phy = phy;
@@ -269,6 +274,9 @@ static int phy_berlin_sata_probe(struct platform_device *pdev)
phy_provider =
devm_of_phy_provider_register(dev, phy_berlin_sata_phy_xlate);
return PTR_ERR_OR_ZERO(phy_provider);
+put_child:
+ of_node_put(child);
+ return ret;
}
static const struct of_device_id phy_berlin_sata_of_match[] = {
diff --git a/drivers/phy/phy-brcmstb-sata.c b/drivers/phy/phy-brcmstb-sata.c
index 8a2cb16a1937..cd9dba820566 100644
--- a/drivers/phy/phy-brcmstb-sata.c
+++ b/drivers/phy/phy-brcmstb-sata.c
@@ -140,7 +140,7 @@ static int brcm_sata_phy_probe(struct platform_device *pdev)
struct brcm_sata_phy *priv;
struct resource *res;
struct phy_provider *provider;
- int count = 0;
+ int ret, count = 0;
if (of_get_child_count(dn) == 0)
return -ENODEV;
@@ -163,16 +163,19 @@ static int brcm_sata_phy_probe(struct platform_device *pdev)
if (of_property_read_u32(child, "reg", &id)) {
dev_err(dev, "missing reg property in node %s\n",
child->name);
- return -EINVAL;
+ ret = -EINVAL;
+ goto put_child;
}
if (id >= MAX_PORTS) {
dev_err(dev, "invalid reg: %u\n", id);
- return -EINVAL;
+ ret = -EINVAL;
+ goto put_child;
}
if (priv->phys[id].phy) {
dev_err(dev, "already registered port %u\n", id);
- return -EINVAL;
+ ret = -EINVAL;
+ goto put_child;
}
port = &priv->phys[id];
@@ -182,7 +185,8 @@ static int brcm_sata_phy_probe(struct platform_device *pdev)
port->ssc_en = of_property_read_bool(child, "brcm,enable-ssc");
if (IS_ERR(port->phy)) {
dev_err(dev, "failed to create PHY\n");
- return PTR_ERR(port->phy);
+ ret = PTR_ERR(port->phy);
+ goto put_child;
}
phy_set_drvdata(port->phy, port);
@@ -198,6 +202,9 @@ static int brcm_sata_phy_probe(struct platform_device *pdev)
dev_info(dev, "registered %d port(s)\n", count);
return 0;
+put_child:
+ of_node_put(child);
+ return ret;
}
static struct platform_driver brcm_sata_phy_driver = {
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index fc48fac003a6..8c7f27db6ad3 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -636,8 +636,9 @@ EXPORT_SYMBOL_GPL(devm_of_phy_get);
* @np: node containing the phy
* @index: index of the phy
*
- * Gets the phy using _of_phy_get(), and associates a device with it using
- * devres. On driver detach, release function is invoked on the devres data,
+ * Gets the phy using _of_phy_get(), then gets a refcount to it,
+ * and associates a device with it using devres. On driver detach,
+ * release function is invoked on the devres data,
* then, devres data is freed.
*
*/
@@ -651,13 +652,21 @@ struct phy *devm_of_phy_get_by_index(struct device *dev, struct device_node *np,
return ERR_PTR(-ENOMEM);
phy = _of_phy_get(np, index);
- if (!IS_ERR(phy)) {
- *ptr = phy;
- devres_add(dev, ptr);
- } else {
+ if (IS_ERR(phy)) {
devres_free(ptr);
+ return phy;
}
+ if (!try_module_get(phy->ops->owner)) {
+ devres_free(ptr);
+ return ERR_PTR(-EPROBE_DEFER);
+ }
+
+ get_device(&phy->dev);
+
+ *ptr = phy;
+ devres_add(dev, ptr);
+
return phy;
}
EXPORT_SYMBOL_GPL(devm_of_phy_get_by_index);
diff --git a/drivers/phy/phy-miphy28lp.c b/drivers/phy/phy-miphy28lp.c
index c47b56b4a2b8..3acd2a1808df 100644
--- a/drivers/phy/phy-miphy28lp.c
+++ b/drivers/phy/phy-miphy28lp.c
@@ -1226,15 +1226,18 @@ static int miphy28lp_probe(struct platform_device *pdev)
miphy_phy = devm_kzalloc(&pdev->dev, sizeof(*miphy_phy),
GFP_KERNEL);
- if (!miphy_phy)
- return -ENOMEM;
+ if (!miphy_phy) {
+ ret = -ENOMEM;
+ goto put_child;
+ }
miphy_dev->phys[port] = miphy_phy;
phy = devm_phy_create(&pdev->dev, child, &miphy28lp_ops);
if (IS_ERR(phy)) {
dev_err(&pdev->dev, "failed to create PHY\n");
- return PTR_ERR(phy);
+ ret = PTR_ERR(phy);
+ goto put_child;
}
miphy_dev->phys[port]->phy = phy;
@@ -1242,11 +1245,11 @@ static int miphy28lp_probe(struct platform_device *pdev)
ret = miphy28lp_of_probe(child, miphy_phy);
if (ret)
- return ret;
+ goto put_child;
ret = miphy28lp_probe_resets(child, miphy_dev->phys[port]);
if (ret)
- return ret;
+ goto put_child;
phy_set_drvdata(phy, miphy_dev->phys[port]);
port++;
@@ -1255,6 +1258,9 @@ static int miphy28lp_probe(struct platform_device *pdev)
provider = devm_of_phy_provider_register(&pdev->dev, miphy28lp_xlate);
return PTR_ERR_OR_ZERO(provider);
+put_child:
+ of_node_put(child);
+ return ret;
}
static const struct of_device_id miphy28lp_of_match[] = {
diff --git a/drivers/phy/phy-miphy365x.c b/drivers/phy/phy-miphy365x.c
index 00a686a073ed..e661f3b36eaa 100644
--- a/drivers/phy/phy-miphy365x.c
+++ b/drivers/phy/phy-miphy365x.c
@@ -566,22 +566,25 @@ static int miphy365x_probe(struct platform_device *pdev)
miphy_phy = devm_kzalloc(&pdev->dev, sizeof(*miphy_phy),
GFP_KERNEL);
- if (!miphy_phy)
- return -ENOMEM;
+ if (!miphy_phy) {
+ ret = -ENOMEM;
+ goto put_child;
+ }
miphy_dev->phys[port] = miphy_phy;
phy = devm_phy_create(&pdev->dev, child, &miphy365x_ops);
if (IS_ERR(phy)) {
dev_err(&pdev->dev, "failed to create PHY\n");
- return PTR_ERR(phy);
+ ret = PTR_ERR(phy);
+ goto put_child;
}
miphy_dev->phys[port]->phy = phy;
ret = miphy365x_of_probe(child, miphy_phy);
if (ret)
- return ret;
+ goto put_child;
phy_set_drvdata(phy, miphy_dev->phys[port]);
@@ -591,12 +594,15 @@ static int miphy365x_probe(struct platform_device *pdev)
&miphy_phy->ctrlreg);
if (ret) {
dev_err(&pdev->dev, "No sysconfig offset found\n");
- return ret;
+ goto put_child;
}
}
provider = devm_of_phy_provider_register(&pdev->dev, miphy365x_xlate);
return PTR_ERR_OR_ZERO(provider);
+put_child:
+ of_node_put(child);
+ return ret;
}
static const struct of_device_id miphy365x_of_match[] = {
diff --git a/drivers/phy/phy-mt65xx-usb3.c b/drivers/phy/phy-mt65xx-usb3.c
index f30b28bd41fe..e427c3b788ff 100644
--- a/drivers/phy/phy-mt65xx-usb3.c
+++ b/drivers/phy/phy-mt65xx-usb3.c
@@ -415,7 +415,7 @@ static int mt65xx_u3phy_probe(struct platform_device *pdev)
struct resource *sif_res;
struct mt65xx_u3phy *u3phy;
struct resource res;
- int port;
+ int port, retval;
u3phy = devm_kzalloc(dev, sizeof(*u3phy), GFP_KERNEL);
if (!u3phy)
@@ -447,31 +447,34 @@ static int mt65xx_u3phy_probe(struct platform_device *pdev)
for_each_child_of_node(np, child_np) {
struct mt65xx_phy_instance *instance;
struct phy *phy;
- int retval;
instance = devm_kzalloc(dev, sizeof(*instance), GFP_KERNEL);
- if (!instance)
- return -ENOMEM;
+ if (!instance) {
+ retval = -ENOMEM;
+ goto put_child;
+ }
u3phy->phys[port] = instance;
phy = devm_phy_create(dev, child_np, &mt65xx_u3phy_ops);
if (IS_ERR(phy)) {
dev_err(dev, "failed to create phy\n");
- return PTR_ERR(phy);
+ retval = PTR_ERR(phy);
+ goto put_child;
}
retval = of_address_to_resource(child_np, 0, &res);
if (retval) {
dev_err(dev, "failed to get address resource(id-%d)\n",
port);
- return retval;
+ goto put_child;
}
instance->port_base = devm_ioremap_resource(&phy->dev, &res);
if (IS_ERR(instance->port_base)) {
dev_err(dev, "failed to remap phy regs\n");
- return PTR_ERR(instance->port_base);
+ retval = PTR_ERR(instance->port_base);
+ goto put_child;
}
instance->phy = phy;
@@ -483,6 +486,9 @@ static int mt65xx_u3phy_probe(struct platform_device *pdev)
provider = devm_of_phy_provider_register(dev, mt65xx_phy_xlate);
return PTR_ERR_OR_ZERO(provider);
+put_child:
+ of_node_put(child_np);
+ return retval;
}
static const struct of_device_id mt65xx_u3phy_id_table[] = {
diff --git a/drivers/phy/phy-rockchip-usb.c b/drivers/phy/phy-rockchip-usb.c
index 91d6f342c565..62c43c435194 100644
--- a/drivers/phy/phy-rockchip-usb.c
+++ b/drivers/phy/phy-rockchip-usb.c
@@ -108,13 +108,16 @@ static int rockchip_usb_phy_probe(struct platform_device *pdev)
for_each_available_child_of_node(dev->of_node, child) {
rk_phy = devm_kzalloc(dev, sizeof(*rk_phy), GFP_KERNEL);
- if (!rk_phy)
- return -ENOMEM;
+ if (!rk_phy) {
+ err = -ENOMEM;
+ goto put_child;
+ }
if (of_property_read_u32(child, "reg", &reg_offset)) {
dev_err(dev, "missing reg property in node %s\n",
child->name);
- return -EINVAL;
+ err = -EINVAL;
+ goto put_child;
}
rk_phy->reg_offset = reg_offset;
@@ -127,18 +130,22 @@ static int rockchip_usb_phy_probe(struct platform_device *pdev)
rk_phy->phy = devm_phy_create(dev, child, &ops);
if (IS_ERR(rk_phy->phy)) {
dev_err(dev, "failed to create PHY\n");
- return PTR_ERR(rk_phy->phy);
+ err = PTR_ERR(rk_phy->phy);
+ goto put_child;
}
phy_set_drvdata(rk_phy->phy, rk_phy);
/* only power up usb phy when it use, so disable it when init*/
err = rockchip_usb_phy_power(rk_phy, 1);
if (err)
- return err;
+ goto put_child;
}
phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
return PTR_ERR_OR_ZERO(phy_provider);
+put_child:
+ of_node_put(child);
+ return err;
}
static const struct of_device_id rockchip_usb_phy_dt_ids[] = {
diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig
index b422e4ed73f4..312c78b27a32 100644
--- a/drivers/pinctrl/Kconfig
+++ b/drivers/pinctrl/Kconfig
@@ -5,8 +5,6 @@
config PINCTRL
bool
-if PINCTRL
-
menu "Pin controllers"
depends on PINCTRL
@@ -274,5 +272,3 @@ config PINCTRL_TB10X
select GPIOLIB
endmenu
-
-endif
diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
index a1ea565fcd46..2e6ca69635aa 100644
--- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c
+++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
@@ -342,12 +342,6 @@ static int bcm2835_gpio_get(struct gpio_chip *chip, unsigned offset)
return bcm2835_gpio_get_bit(pc, GPLEV0, offset);
}
-static int bcm2835_gpio_direction_output(struct gpio_chip *chip,
- unsigned offset, int value)
-{
- return pinctrl_gpio_direction_output(chip->base + offset);
-}
-
static void bcm2835_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
{
struct bcm2835_pinctrl *pc = dev_get_drvdata(chip->dev);
@@ -355,6 +349,13 @@ static void bcm2835_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
bcm2835_gpio_set_bit(pc, value ? GPSET0 : GPCLR0, offset);
}
+static int bcm2835_gpio_direction_output(struct gpio_chip *chip,
+ unsigned offset, int value)
+{
+ bcm2835_gpio_set(chip, offset, value);
+ return pinctrl_gpio_direction_output(chip->base + offset);
+}
+
static int bcm2835_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
{
struct bcm2835_pinctrl *pc = dev_get_drvdata(chip->dev);
diff --git a/drivers/pinctrl/freescale/pinctrl-imx1-core.c b/drivers/pinctrl/freescale/pinctrl-imx1-core.c
index 88a7fac11bd4..acaf84cadca3 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx1-core.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx1-core.c
@@ -538,8 +538,10 @@ static int imx1_pinctrl_parse_functions(struct device_node *np,
func->groups[i] = child->name;
grp = &info->groups[grp_index++];
ret = imx1_pinctrl_parse_groups(child, grp, info, i++);
- if (ret == -ENOMEM)
+ if (ret == -ENOMEM) {
+ of_node_put(child);
return ret;
+ }
}
return 0;
@@ -582,8 +584,10 @@ static int imx1_pinctrl_parse_dt(struct platform_device *pdev,
for_each_child_of_node(np, child) {
ret = imx1_pinctrl_parse_functions(child, info, ifunc++);
- if (ret == -ENOMEM)
+ if (ret == -ENOMEM) {
+ of_node_put(child);
return -ENOMEM;
+ }
}
return 0;
diff --git a/drivers/pinctrl/freescale/pinctrl-vf610.c b/drivers/pinctrl/freescale/pinctrl-vf610.c
index 37a037543d29..587d1ff6210e 100644
--- a/drivers/pinctrl/freescale/pinctrl-vf610.c
+++ b/drivers/pinctrl/freescale/pinctrl-vf610.c
@@ -299,7 +299,7 @@ static const struct pinctrl_pin_desc vf610_pinctrl_pads[] = {
static struct imx_pinctrl_soc_info vf610_pinctrl_info = {
.pins = vf610_pinctrl_pads,
.npins = ARRAY_SIZE(vf610_pinctrl_pads),
- .flags = SHARE_MUX_CONF_REG,
+ .flags = SHARE_MUX_CONF_REG | ZERO_OFFSET_VALID,
};
static const struct of_device_id vf610_pinctrl_of_match[] = {
diff --git a/drivers/pinctrl/intel/pinctrl-broxton.c b/drivers/pinctrl/intel/pinctrl-broxton.c
index e42d5d4183f5..5979d38c46b2 100644
--- a/drivers/pinctrl/intel/pinctrl-broxton.c
+++ b/drivers/pinctrl/intel/pinctrl-broxton.c
@@ -28,6 +28,7 @@
.padcfglock_offset = BXT_PADCFGLOCK, \
.hostown_offset = BXT_HOSTSW_OWN, \
.ie_offset = BXT_GPI_IE, \
+ .gpp_size = 32, \
.pin_base = (s), \
.npins = ((e) - (s) + 1), \
}
diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
index 392e28d3f48d..26f6b6ffea5b 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c
@@ -25,9 +25,6 @@
#include "pinctrl-intel.h"
-/* Maximum number of pads in each group */
-#define NPADS_IN_GPP 24
-
/* Offset from regs */
#define PADBAR 0x00c
#define GPI_IS 0x100
@@ -37,6 +34,7 @@
#define PADOWN_BITS 4
#define PADOWN_SHIFT(p) ((p) % 8 * PADOWN_BITS)
#define PADOWN_MASK(p) (0xf << PADOWN_SHIFT(p))
+#define PADOWN_GPP(p) ((p) / 8)
/* Offset from pad_regs */
#define PADCFG0 0x000
@@ -142,7 +140,7 @@ static void __iomem *intel_get_padcfg(struct intel_pinctrl *pctrl, unsigned pin,
static bool intel_pad_owned_by_host(struct intel_pinctrl *pctrl, unsigned pin)
{
const struct intel_community *community;
- unsigned padno, gpp, gpp_offset, offset;
+ unsigned padno, gpp, offset, group;
void __iomem *padown;
community = intel_get_community(pctrl, pin);
@@ -152,9 +150,9 @@ static bool intel_pad_owned_by_host(struct intel_pinctrl *pctrl, unsigned pin)
return true;
padno = pin_to_padno(community, pin);
- gpp = padno / NPADS_IN_GPP;
- gpp_offset = padno % NPADS_IN_GPP;
- offset = community->padown_offset + gpp * 16 + (gpp_offset / 8) * 4;
+ group = padno / community->gpp_size;
+ gpp = PADOWN_GPP(padno % community->gpp_size);
+ offset = community->padown_offset + 0x10 * group + gpp * 4;
padown = community->regs + offset;
return !(readl(padown) & PADOWN_MASK(padno));
@@ -173,11 +171,11 @@ static bool intel_pad_acpi_mode(struct intel_pinctrl *pctrl, unsigned pin)
return false;
padno = pin_to_padno(community, pin);
- gpp = padno / NPADS_IN_GPP;
+ gpp = padno / community->gpp_size;
offset = community->hostown_offset + gpp * 4;
hostown = community->regs + offset;
- return !(readl(hostown) & BIT(padno % NPADS_IN_GPP));
+ return !(readl(hostown) & BIT(padno % community->gpp_size));
}
static bool intel_pad_locked(struct intel_pinctrl *pctrl, unsigned pin)
@@ -193,7 +191,7 @@ static bool intel_pad_locked(struct intel_pinctrl *pctrl, unsigned pin)
return false;
padno = pin_to_padno(community, pin);
- gpp = padno / NPADS_IN_GPP;
+ gpp = padno / community->gpp_size;
/*
* If PADCFGLOCK and PADCFGLOCKTX bits are both clear for this pad,
@@ -202,12 +200,12 @@ static bool intel_pad_locked(struct intel_pinctrl *pctrl, unsigned pin)
*/
offset = community->padcfglock_offset + gpp * 8;
value = readl(community->regs + offset);
- if (value & BIT(pin % NPADS_IN_GPP))
+ if (value & BIT(pin % community->gpp_size))
return true;
offset = community->padcfglock_offset + 4 + gpp * 8;
value = readl(community->regs + offset);
- if (value & BIT(pin % NPADS_IN_GPP))
+ if (value & BIT(pin % community->gpp_size))
return true;
return false;
@@ -663,8 +661,8 @@ static void intel_gpio_irq_ack(struct irq_data *d)
community = intel_get_community(pctrl, pin);
if (community) {
unsigned padno = pin_to_padno(community, pin);
- unsigned gpp_offset = padno % NPADS_IN_GPP;
- unsigned gpp = padno / NPADS_IN_GPP;
+ unsigned gpp_offset = padno % community->gpp_size;
+ unsigned gpp = padno / community->gpp_size;
writel(BIT(gpp_offset), community->regs + GPI_IS + gpp * 4);
}
@@ -685,8 +683,8 @@ static void intel_gpio_irq_mask_unmask(struct irq_data *d, bool mask)
community = intel_get_community(pctrl, pin);
if (community) {
unsigned padno = pin_to_padno(community, pin);
- unsigned gpp_offset = padno % NPADS_IN_GPP;
- unsigned gpp = padno / NPADS_IN_GPP;
+ unsigned gpp_offset = padno % community->gpp_size;
+ unsigned gpp = padno / community->gpp_size;
void __iomem *reg;
u32 value;
@@ -780,8 +778,8 @@ static int intel_gpio_irq_wake(struct irq_data *d, unsigned int on)
return -EINVAL;
padno = pin_to_padno(community, pin);
- gpp = padno / NPADS_IN_GPP;
- gpp_offset = padno % NPADS_IN_GPP;
+ gpp = padno / community->gpp_size;
+ gpp_offset = padno % community->gpp_size;
/* Clear the existing wake status */
writel(BIT(gpp_offset), community->regs + GPI_GPE_STS + gpp * 4);
@@ -819,14 +817,14 @@ static irqreturn_t intel_gpio_community_irq_handler(struct intel_pinctrl *pctrl,
/* Only interrupts that are enabled */
pending &= enabled;
- for_each_set_bit(gpp_offset, &pending, NPADS_IN_GPP) {
+ for_each_set_bit(gpp_offset, &pending, community->gpp_size) {
unsigned padno, irq;
/*
* The last group in community can have less pins
* than NPADS_IN_GPP.
*/
- padno = gpp_offset + gpp * NPADS_IN_GPP;
+ padno = gpp_offset + gpp * community->gpp_size;
if (padno >= community->npins)
break;
@@ -1002,7 +1000,8 @@ int intel_pinctrl_probe(struct platform_device *pdev,
community->regs = regs;
community->pad_regs = regs + padbar;
- community->ngpps = DIV_ROUND_UP(community->npins, NPADS_IN_GPP);
+ community->ngpps = DIV_ROUND_UP(community->npins,
+ community->gpp_size);
}
irq = platform_get_irq(pdev, 0);
diff --git a/drivers/pinctrl/intel/pinctrl-intel.h b/drivers/pinctrl/intel/pinctrl-intel.h
index 4ec8b572a288..b60215793017 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.h
+++ b/drivers/pinctrl/intel/pinctrl-intel.h
@@ -55,6 +55,8 @@ struct intel_function {
* ACPI).
* @ie_offset: Register offset of GPI_IE from @regs.
* @pin_base: Starting pin of pins in this community
+ * @gpp_size: Maximum number of pads in each group, such as PADCFGLOCK,
+ * HOSTSW_OWN, GPI_IS, GPI_IE, etc.
* @npins: Number of pins in this community
* @regs: Community specific common registers (reserved for core driver)
* @pad_regs: Community specific pad registers (reserved for core driver)
@@ -68,6 +70,7 @@ struct intel_community {
unsigned hostown_offset;
unsigned ie_offset;
unsigned pin_base;
+ unsigned gpp_size;
size_t npins;
void __iomem *regs;
void __iomem *pad_regs;
diff --git a/drivers/pinctrl/intel/pinctrl-sunrisepoint.c b/drivers/pinctrl/intel/pinctrl-sunrisepoint.c
index 1de9ae5010db..c725a5313b4e 100644
--- a/drivers/pinctrl/intel/pinctrl-sunrisepoint.c
+++ b/drivers/pinctrl/intel/pinctrl-sunrisepoint.c
@@ -30,6 +30,7 @@
.padcfglock_offset = SPT_PADCFGLOCK, \
.hostown_offset = SPT_HOSTSW_OWN, \
.ie_offset = SPT_GPI_IE, \
+ .gpp_size = 24, \
.pin_base = (s), \
.npins = ((e) - (s) + 1), \
}
diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
index f307f1d27d64..5c717275a7fa 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
@@ -747,7 +747,7 @@ static int mtk_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
reg_addr = mtk_get_port(pctl, offset) + pctl->devdata->dir_offset;
bit = BIT(offset & 0xf);
regmap_read(pctl->regmap1, reg_addr, &read_val);
- return !!(read_val & bit);
+ return !(read_val & bit);
}
static int mtk_gpio_get(struct gpio_chip *chip, unsigned offset)
@@ -757,12 +757,8 @@ static int mtk_gpio_get(struct gpio_chip *chip, unsigned offset)
unsigned int read_val = 0;
struct mtk_pinctrl *pctl = dev_get_drvdata(chip->dev);
- if (mtk_gpio_get_direction(chip, offset))
- reg_addr = mtk_get_port(pctl, offset) +
- pctl->devdata->dout_offset;
- else
- reg_addr = mtk_get_port(pctl, offset) +
- pctl->devdata->din_offset;
+ reg_addr = mtk_get_port(pctl, offset) +
+ pctl->devdata->din_offset;
bit = BIT(offset & 0xf);
regmap_read(pctl->regmap1, reg_addr, &read_val);
@@ -997,6 +993,7 @@ static struct gpio_chip mtk_gpio_chip = {
.owner = THIS_MODULE,
.request = gpiochip_generic_request,
.free = gpiochip_generic_free,
+ .get_direction = mtk_gpio_get_direction,
.direction_input = mtk_gpio_direction_input,
.direction_output = mtk_gpio_direction_output,
.get = mtk_gpio_get,
diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
index d809c9eaa323..19a3c3bc2f1f 100644
--- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
@@ -672,7 +672,7 @@ static int pm8xxx_gpio_probe(struct platform_device *pdev)
return -ENOMEM;
pctrl->dev = &pdev->dev;
- pctrl->npins = (unsigned)of_device_get_match_data(&pdev->dev);
+ pctrl->npins = (unsigned long)of_device_get_match_data(&pdev->dev);
pctrl->regmap = dev_get_regmap(pdev->dev.parent, NULL);
if (!pctrl->regmap) {
diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
index 8982027de8e8..b868ef1766a0 100644
--- a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
@@ -763,7 +763,7 @@ static int pm8xxx_mpp_probe(struct platform_device *pdev)
return -ENOMEM;
pctrl->dev = &pdev->dev;
- pctrl->npins = (unsigned)of_device_get_match_data(&pdev->dev);
+ pctrl->npins = (unsigned long)of_device_get_match_data(&pdev->dev);
pctrl->regmap = dev_get_regmap(pdev->dev.parent, NULL);
if (!pctrl->regmap) {
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7734.c b/drivers/pinctrl/sh-pfc/pfc-sh7734.c
index e7deb51de7dc..9842bb106796 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh7734.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh7734.c
@@ -31,11 +31,11 @@
PORT_GP_12(5, fn, sfx)
#undef _GP_DATA
-#define _GP_DATA(bank, pin, name, sfx) \
+#define _GP_DATA(bank, pin, name, sfx, cfg) \
PINMUX_DATA(name##_DATA, name##_FN, name##_IN, name##_OUT)
-#define _GP_INOUTSEL(bank, pin, name, sfx) name##_IN, name##_OUT
-#define _GP_INDT(bank, pin, name, sfx) name##_DATA
+#define _GP_INOUTSEL(bank, pin, name, sfx, cfg) name##_IN, name##_OUT
+#define _GP_INDT(bank, pin, name, sfx, cfg) name##_DATA
#define GP_INOUTSEL(bank) PORT_GP_32_REV(bank, _GP_INOUTSEL, unused)
#define GP_INDT(bank) PORT_GP_32_REV(bank, _GP_INDT, unused)
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index cc97f0869791..48747c28a43d 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -1341,10 +1341,13 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu)
for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
/* check if the domain is locked by BIOS */
- if (rapl_read_data_raw(rd, FW_LOCK, false, &locked)) {
+ ret = rapl_read_data_raw(rd, FW_LOCK, false, &locked);
+ if (ret)
+ return ret;
+ if (locked) {
pr_info("RAPL package %d domain %s locked by BIOS\n",
rp->id, rd->name);
- rd->state |= DOMAIN_STATE_BIOS_LOCKED;
+ rd->state |= DOMAIN_STATE_BIOS_LOCKED;
}
}
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 8b3130f22b42..9e03d158f411 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -1478,6 +1478,8 @@ module_init(remoteproc_init);
static void __exit remoteproc_exit(void)
{
+ ida_destroy(&rproc_dev_index);
+
rproc_exit_debugfs();
}
module_exit(remoteproc_exit);
diff --git a/drivers/remoteproc/remoteproc_debugfs.c b/drivers/remoteproc/remoteproc_debugfs.c
index 9d30809bb407..916af5096f57 100644
--- a/drivers/remoteproc/remoteproc_debugfs.c
+++ b/drivers/remoteproc/remoteproc_debugfs.c
@@ -156,7 +156,7 @@ rproc_recovery_write(struct file *filp, const char __user *user_buf,
char buf[10];
int ret;
- if (count > sizeof(buf))
+ if (count < 1 || count > sizeof(buf))
return count;
ret = copy_from_user(buf, user_buf, count);
diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c
index 284b587da65c..d6c853bbfa9f 100644
--- a/drivers/rtc/rtc-da9063.c
+++ b/drivers/rtc/rtc-da9063.c
@@ -483,24 +483,23 @@ static int da9063_rtc_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, rtc);
+ rtc->rtc_dev = devm_rtc_device_register(&pdev->dev, DA9063_DRVNAME_RTC,
+ &da9063_rtc_ops, THIS_MODULE);
+ if (IS_ERR(rtc->rtc_dev))
+ return PTR_ERR(rtc->rtc_dev);
+
+ da9063_data_to_tm(data, &rtc->alarm_time, rtc);
+ rtc->rtc_sync = false;
+
irq_alarm = platform_get_irq_byname(pdev, "ALARM");
ret = devm_request_threaded_irq(&pdev->dev, irq_alarm, NULL,
da9063_alarm_event,
IRQF_TRIGGER_LOW | IRQF_ONESHOT,
"ALARM", rtc);
- if (ret) {
+ if (ret)
dev_err(&pdev->dev, "Failed to request ALARM IRQ %d: %d\n",
irq_alarm, ret);
- return ret;
- }
-
- rtc->rtc_dev = devm_rtc_device_register(&pdev->dev, DA9063_DRVNAME_RTC,
- &da9063_rtc_ops, THIS_MODULE);
- if (IS_ERR(rtc->rtc_dev))
- return PTR_ERR(rtc->rtc_dev);
- da9063_data_to_tm(data, &rtc->alarm_time, rtc);
- rtc->rtc_sync = false;
return ret;
}
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 188006c55ce0..aa705bb4748c 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -15,9 +15,6 @@
#include <linux/i2c.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
-#include <linux/pm_wakeirq.h>
#include <linux/rtc/ds1307.h>
#include <linux/rtc.h>
#include <linux/slab.h>
@@ -117,7 +114,6 @@ struct ds1307 {
#define HAS_ALARM 1 /* bit 1 == irq claimed */
struct i2c_client *client;
struct rtc_device *rtc;
- int wakeirq;
s32 (*read_block_data)(const struct i2c_client *client, u8 command,
u8 length, u8 *values);
s32 (*write_block_data)(const struct i2c_client *client, u8 command,
@@ -1138,7 +1134,10 @@ read_rtc:
bin2bcd(tmp));
}
- device_set_wakeup_capable(&client->dev, want_irq);
+ if (want_irq) {
+ device_set_wakeup_capable(&client->dev, true);
+ set_bit(HAS_ALARM, &ds1307->flags);
+ }
ds1307->rtc = devm_rtc_device_register(&client->dev, client->name,
rtc_ops, THIS_MODULE);
if (IS_ERR(ds1307->rtc)) {
@@ -1146,43 +1145,19 @@ read_rtc:
}
if (want_irq) {
- struct device_node *node = client->dev.of_node;
-
err = devm_request_threaded_irq(&client->dev,
client->irq, NULL, irq_handler,
IRQF_SHARED | IRQF_ONESHOT,
ds1307->rtc->name, client);
if (err) {
client->irq = 0;
+ device_set_wakeup_capable(&client->dev, false);
+ clear_bit(HAS_ALARM, &ds1307->flags);
dev_err(&client->dev, "unable to request IRQ!\n");
- goto no_irq;
- }
-
- set_bit(HAS_ALARM, &ds1307->flags);
- dev_dbg(&client->dev, "got IRQ %d\n", client->irq);
-
- /* Currently supported by OF code only! */
- if (!node)
- goto no_irq;
-
- err = of_irq_get(node, 1);
- if (err <= 0) {
- if (err == -EPROBE_DEFER)
- goto exit;
- goto no_irq;
- }
- ds1307->wakeirq = err;
-
- err = dev_pm_set_dedicated_wake_irq(&client->dev,
- ds1307->wakeirq);
- if (err) {
- dev_err(&client->dev, "unable to setup wakeIRQ %d!\n",
- err);
- goto exit;
- }
+ } else
+ dev_dbg(&client->dev, "got IRQ %d\n", client->irq);
}
-no_irq:
if (chip->nvram_size) {
ds1307->nvram = devm_kzalloc(&client->dev,
@@ -1226,9 +1201,6 @@ static int ds1307_remove(struct i2c_client *client)
{
struct ds1307 *ds1307 = i2c_get_clientdata(client);
- if (ds1307->wakeirq)
- dev_pm_clear_wake_irq(&client->dev);
-
if (test_and_clear_bit(HAS_NVRAM, &ds1307->flags))
sysfs_remove_bin_file(&client->dev.kobj, ds1307->nvram);
diff --git a/drivers/rtc/rtc-rk808.c b/drivers/rtc/rtc-rk808.c
index 91ca0bc1b484..35c9aada07c8 100644
--- a/drivers/rtc/rtc-rk808.c
+++ b/drivers/rtc/rtc-rk808.c
@@ -56,6 +56,42 @@ struct rk808_rtc {
int irq;
};
+/*
+ * The Rockchip calendar used by the RK808 counts November with 31 days. We use
+ * these translation functions to convert its dates to/from the Gregorian
+ * calendar used by the rest of the world. We arbitrarily define Jan 1st, 2016
+ * as the day when both calendars were in sync, and treat all other dates
+ * relative to that.
+ * NOTE: Other system software (e.g. firmware) that reads the same hardware must
+ * implement this exact same conversion algorithm, with the same anchor date.
+ */
+static time64_t nov2dec_transitions(struct rtc_time *tm)
+{
+ return (tm->tm_year + 1900) - 2016 + (tm->tm_mon + 1 > 11 ? 1 : 0);
+}
+
+static void rockchip_to_gregorian(struct rtc_time *tm)
+{
+ /* If it's Nov 31st, rtc_tm_to_time64() will count that like Dec 1st */
+ time64_t time = rtc_tm_to_time64(tm);
+ rtc_time64_to_tm(time + nov2dec_transitions(tm) * 86400, tm);
+}
+
+static void gregorian_to_rockchip(struct rtc_time *tm)
+{
+ time64_t extra_days = nov2dec_transitions(tm);
+ time64_t time = rtc_tm_to_time64(tm);
+ rtc_time64_to_tm(time - extra_days * 86400, tm);
+
+ /* Compensate if we went back over Nov 31st (will work up to 2381) */
+ if (nov2dec_transitions(tm) < extra_days) {
+ if (tm->tm_mon + 1 == 11)
+ tm->tm_mday++; /* This may result in 31! */
+ else
+ rtc_time64_to_tm(time - (extra_days - 1) * 86400, tm);
+ }
+}
+
/* Read current time and date in RTC */
static int rk808_rtc_readtime(struct device *dev, struct rtc_time *tm)
{
@@ -101,9 +137,10 @@ static int rk808_rtc_readtime(struct device *dev, struct rtc_time *tm)
tm->tm_mon = (bcd2bin(rtc_data[4] & MONTHS_REG_MSK)) - 1;
tm->tm_year = (bcd2bin(rtc_data[5] & YEARS_REG_MSK)) + 100;
tm->tm_wday = bcd2bin(rtc_data[6] & WEEKS_REG_MSK);
+ rockchip_to_gregorian(tm);
dev_dbg(dev, "RTC date/time %4d-%02d-%02d(%d) %02d:%02d:%02d\n",
1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
- tm->tm_wday, tm->tm_hour , tm->tm_min, tm->tm_sec);
+ tm->tm_wday, tm->tm_hour, tm->tm_min, tm->tm_sec);
return ret;
}
@@ -116,6 +153,10 @@ static int rk808_rtc_set_time(struct device *dev, struct rtc_time *tm)
u8 rtc_data[NUM_TIME_REGS];
int ret;
+ dev_dbg(dev, "set RTC date/time %4d-%02d-%02d(%d) %02d:%02d:%02d\n",
+ 1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
+ tm->tm_wday, tm->tm_hour, tm->tm_min, tm->tm_sec);
+ gregorian_to_rockchip(tm);
rtc_data[0] = bin2bcd(tm->tm_sec);
rtc_data[1] = bin2bcd(tm->tm_min);
rtc_data[2] = bin2bcd(tm->tm_hour);
@@ -123,9 +164,6 @@ static int rk808_rtc_set_time(struct device *dev, struct rtc_time *tm)
rtc_data[4] = bin2bcd(tm->tm_mon + 1);
rtc_data[5] = bin2bcd(tm->tm_year - 100);
rtc_data[6] = bin2bcd(tm->tm_wday);
- dev_dbg(dev, "set RTC date/time %4d-%02d-%02d(%d) %02d:%02d:%02d\n",
- 1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
- tm->tm_wday, tm->tm_hour , tm->tm_min, tm->tm_sec);
/* Stop RTC while updating the RTC registers */
ret = regmap_update_bits(rk808->regmap, RK808_RTC_CTRL_REG,
@@ -170,6 +208,7 @@ static int rk808_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm)
alrm->time.tm_mday = bcd2bin(alrm_data[3] & DAYS_REG_MSK);
alrm->time.tm_mon = (bcd2bin(alrm_data[4] & MONTHS_REG_MSK)) - 1;
alrm->time.tm_year = (bcd2bin(alrm_data[5] & YEARS_REG_MSK)) + 100;
+ rockchip_to_gregorian(&alrm->time);
ret = regmap_read(rk808->regmap, RK808_RTC_INT_REG, &int_reg);
if (ret) {
@@ -227,6 +266,7 @@ static int rk808_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
alrm->time.tm_mday, alrm->time.tm_wday, alrm->time.tm_hour,
alrm->time.tm_min, alrm->time.tm_sec);
+ gregorian_to_rockchip(&alrm->time);
alrm_data[0] = bin2bcd(alrm->time.tm_sec);
alrm_data[1] = bin2bcd(alrm->time.tm_min);
alrm_data[2] = bin2bcd(alrm->time.tm_hour);
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 548a18916a31..a831d18596a5 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -1080,28 +1080,10 @@ void __init chsc_init_cleanup(void)
free_page((unsigned long)sei_page);
}
-int chsc_enable_facility(int operation_code)
+int __chsc_enable_facility(struct chsc_sda_area *sda_area, int operation_code)
{
- unsigned long flags;
int ret;
- struct {
- struct chsc_header request;
- u8 reserved1:4;
- u8 format:4;
- u8 reserved2;
- u16 operation_code;
- u32 reserved3;
- u32 reserved4;
- u32 operation_data_area[252];
- struct chsc_header response;
- u32 reserved5:4;
- u32 format2:4;
- u32 reserved6:24;
- } __attribute__ ((packed)) *sda_area;
- spin_lock_irqsave(&chsc_page_lock, flags);
- memset(chsc_page, 0, PAGE_SIZE);
- sda_area = chsc_page;
sda_area->request.length = 0x0400;
sda_area->request.code = 0x0031;
sda_area->operation_code = operation_code;
@@ -1119,10 +1101,25 @@ int chsc_enable_facility(int operation_code)
default:
ret = chsc_error_from_response(sda_area->response.code);
}
+out:
+ return ret;
+}
+
+int chsc_enable_facility(int operation_code)
+{
+ struct chsc_sda_area *sda_area;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&chsc_page_lock, flags);
+ memset(chsc_page, 0, PAGE_SIZE);
+ sda_area = chsc_page;
+
+ ret = __chsc_enable_facility(sda_area, operation_code);
if (ret != 0)
CIO_CRW_EVENT(2, "chsc: sda (oc=%x) failed (rc=%04x)\n",
operation_code, sda_area->response.code);
-out:
+
spin_unlock_irqrestore(&chsc_page_lock, flags);
return ret;
}
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index 76c9b50700b2..0de134c3a204 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -115,6 +115,20 @@ struct chsc_scpd {
u8 data[PAGE_SIZE - 20];
} __attribute__ ((packed));
+struct chsc_sda_area {
+ struct chsc_header request;
+ u8 :4;
+ u8 format:4;
+ u8 :8;
+ u16 operation_code;
+ u32 :32;
+ u32 :32;
+ u32 operation_data_area[252];
+ struct chsc_header response;
+ u32 :4;
+ u32 format2:4;
+ u32 :24;
+} __packed __aligned(PAGE_SIZE);
extern int chsc_get_ssd_info(struct subchannel_id schid,
struct chsc_ssd_info *ssd);
@@ -122,6 +136,7 @@ extern int chsc_determine_css_characteristics(void);
extern int chsc_init(void);
extern void chsc_init_cleanup(void);
+int __chsc_enable_facility(struct chsc_sda_area *sda_area, int operation_code);
extern int chsc_enable_facility(int);
struct channel_subsystem;
extern int chsc_secm(struct channel_subsystem *, int);
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index b5620e818d6b..690b8547e828 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -925,18 +925,32 @@ void reipl_ccw_dev(struct ccw_dev_id *devid)
int __init cio_get_iplinfo(struct cio_iplinfo *iplinfo)
{
+ static struct chsc_sda_area sda_area __initdata;
struct subchannel_id schid;
struct schib schib;
schid = *(struct subchannel_id *)&S390_lowcore.subchannel_id;
if (!schid.one)
return -ENODEV;
+
+ if (schid.ssid) {
+ /*
+ * Firmware should have already enabled MSS but whoever started
+ * the kernel might have initiated a channel subsystem reset.
+ * Ensure that MSS is enabled.
+ */
+ memset(&sda_area, 0, sizeof(sda_area));
+ if (__chsc_enable_facility(&sda_area, CHSC_SDA_OC_MSS))
+ return -ENODEV;
+ }
if (stsch_err(schid, &schib))
return -ENODEV;
if (schib.pmcw.st != SUBCHANNEL_TYPE_IO)
return -ENODEV;
if (!schib.pmcw.dnv)
return -ENODEV;
+
+ iplinfo->ssid = schid.ssid;
iplinfo->devno = schib.pmcw.dev;
iplinfo->is_qdio = schib.pmcw.qf;
return 0;
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 2ee3053bdc12..489e703dc82d 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -702,17 +702,12 @@ css_generate_pgid(struct channel_subsystem *css, u32 tod_high)
css->global_pgid.pgid_high.ext_cssid.version = 0x80;
css->global_pgid.pgid_high.ext_cssid.cssid = css->cssid;
} else {
-#ifdef CONFIG_SMP
css->global_pgid.pgid_high.cpu_addr = stap();
-#else
- css->global_pgid.pgid_high.cpu_addr = 0;
-#endif
}
get_cpu_id(&cpu_id);
css->global_pgid.cpu_id = cpu_id.ident;
css->global_pgid.cpu_model = cpu_id.machine;
css->global_pgid.tod_high = tod_high;
-
}
static void
diff --git a/drivers/s390/crypto/Makefile b/drivers/s390/crypto/Makefile
index 57f710b3c8a4..b8ab18676e69 100644
--- a/drivers/s390/crypto/Makefile
+++ b/drivers/s390/crypto/Makefile
@@ -3,6 +3,9 @@
#
ap-objs := ap_bus.o
-obj-$(CONFIG_ZCRYPT) += ap.o zcrypt_api.o zcrypt_pcixcc.o
-obj-$(CONFIG_ZCRYPT) += zcrypt_cex2a.o zcrypt_cex4.o
+# zcrypt_api depends on ap
+obj-$(CONFIG_ZCRYPT) += ap.o zcrypt_api.o
+# msgtype* depend on zcrypt_api
obj-$(CONFIG_ZCRYPT) += zcrypt_msgtype6.o zcrypt_msgtype50.o
+# adapter drivers depend on ap, zcrypt_api and msgtype*
+obj-$(CONFIG_ZCRYPT) += zcrypt_pcixcc.o zcrypt_cex2a.o zcrypt_cex4.o
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 9cb3dfbcaddb..24ec282e15d8 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -74,6 +74,7 @@ static struct device *ap_root_device = NULL;
static struct ap_config_info *ap_configuration;
static DEFINE_SPINLOCK(ap_device_list_lock);
static LIST_HEAD(ap_device_list);
+static bool initialised;
/*
* Workqueue timer for bus rescan.
@@ -598,8 +599,10 @@ static enum ap_wait ap_sm_read(struct ap_device *ap_dev)
status = ap_sm_recv(ap_dev);
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
- if (ap_dev->queue_count > 0)
+ if (ap_dev->queue_count > 0) {
+ ap_dev->state = AP_STATE_WORKING;
return AP_WAIT_AGAIN;
+ }
ap_dev->state = AP_STATE_IDLE;
return AP_WAIT_NONE;
case AP_RESPONSE_NO_PENDING_REPLY:
@@ -1384,6 +1387,9 @@ int ap_driver_register(struct ap_driver *ap_drv, struct module *owner,
{
struct device_driver *drv = &ap_drv->driver;
+ if (!initialised)
+ return -ENODEV;
+
drv->bus = &ap_bus_type;
drv->probe = ap_device_probe;
drv->remove = ap_device_remove;
@@ -1808,6 +1814,7 @@ int __init ap_module_init(void)
goto out_pm;
queue_work(system_long_wq, &ap_scan_work);
+ initialised = true;
return 0;
@@ -1837,6 +1844,7 @@ void ap_module_exit(void)
{
int i;
+ initialised = false;
ap_reset_domain();
ap_poll_thread_stop();
del_timer_sync(&ap_config_timer);
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index a9603ebbc1f8..9f8fa42c062c 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -317,11 +317,9 @@ EXPORT_SYMBOL(zcrypt_device_unregister);
void zcrypt_msgtype_register(struct zcrypt_ops *zops)
{
- if (zops->owner) {
- spin_lock_bh(&zcrypt_ops_list_lock);
- list_add_tail(&zops->list, &zcrypt_ops_list);
- spin_unlock_bh(&zcrypt_ops_list_lock);
- }
+ spin_lock_bh(&zcrypt_ops_list_lock);
+ list_add_tail(&zops->list, &zcrypt_ops_list);
+ spin_unlock_bh(&zcrypt_ops_list_lock);
}
EXPORT_SYMBOL(zcrypt_msgtype_register);
@@ -342,7 +340,7 @@ struct zcrypt_ops *__ops_lookup(unsigned char *name, int variant)
spin_lock_bh(&zcrypt_ops_list_lock);
list_for_each_entry(zops, &zcrypt_ops_list, list) {
if ((zops->variant == variant) &&
- (!strncmp(zops->owner->name, name, MODULE_NAME_LEN))) {
+ (!strncmp(zops->name, name, sizeof(zops->name)))) {
found = 1;
break;
}
diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h
index 750876891931..38618f05ad92 100644
--- a/drivers/s390/crypto/zcrypt_api.h
+++ b/drivers/s390/crypto/zcrypt_api.h
@@ -96,6 +96,7 @@ struct zcrypt_ops {
struct list_head list; /* zcrypt ops list. */
struct module *owner;
int variant;
+ char name[128];
};
struct zcrypt_device {
diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c
index 71ceee9137a8..74edf2934e7c 100644
--- a/drivers/s390/crypto/zcrypt_msgtype50.c
+++ b/drivers/s390/crypto/zcrypt_msgtype50.c
@@ -513,6 +513,7 @@ static struct zcrypt_ops zcrypt_msgtype50_ops = {
.rsa_modexpo = zcrypt_cex2a_modexpo,
.rsa_modexpo_crt = zcrypt_cex2a_modexpo_crt,
.owner = THIS_MODULE,
+ .name = MSGTYPE50_NAME,
.variant = MSGTYPE50_VARIANT_DEFAULT,
};
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index 74762214193b..9a2dd472c1cc 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -1119,6 +1119,7 @@ static long zcrypt_msgtype6_rng(struct zcrypt_device *zdev,
*/
static struct zcrypt_ops zcrypt_msgtype6_norng_ops = {
.owner = THIS_MODULE,
+ .name = MSGTYPE06_NAME,
.variant = MSGTYPE06_VARIANT_NORNG,
.rsa_modexpo = zcrypt_msgtype6_modexpo,
.rsa_modexpo_crt = zcrypt_msgtype6_modexpo_crt,
@@ -1127,6 +1128,7 @@ static struct zcrypt_ops zcrypt_msgtype6_norng_ops = {
static struct zcrypt_ops zcrypt_msgtype6_ops = {
.owner = THIS_MODULE,
+ .name = MSGTYPE06_NAME,
.variant = MSGTYPE06_VARIANT_DEFAULT,
.rsa_modexpo = zcrypt_msgtype6_modexpo,
.rsa_modexpo_crt = zcrypt_msgtype6_modexpo_crt,
@@ -1136,6 +1138,7 @@ static struct zcrypt_ops zcrypt_msgtype6_ops = {
static struct zcrypt_ops zcrypt_msgtype6_ep11_ops = {
.owner = THIS_MODULE,
+ .name = MSGTYPE06_NAME,
.variant = MSGTYPE06_VARIANT_EP11,
.rsa_modexpo = NULL,
.rsa_modexpo_crt = NULL,
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index b2a1a81e6fc8..1b831598df7c 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -984,6 +984,36 @@ static struct virtqueue *virtio_ccw_vq_by_ind(struct virtio_ccw_device *vcdev,
return vq;
}
+static void virtio_ccw_check_activity(struct virtio_ccw_device *vcdev,
+ __u32 activity)
+{
+ if (vcdev->curr_io & activity) {
+ switch (activity) {
+ case VIRTIO_CCW_DOING_READ_FEAT:
+ case VIRTIO_CCW_DOING_WRITE_FEAT:
+ case VIRTIO_CCW_DOING_READ_CONFIG:
+ case VIRTIO_CCW_DOING_WRITE_CONFIG:
+ case VIRTIO_CCW_DOING_WRITE_STATUS:
+ case VIRTIO_CCW_DOING_SET_VQ:
+ case VIRTIO_CCW_DOING_SET_IND:
+ case VIRTIO_CCW_DOING_SET_CONF_IND:
+ case VIRTIO_CCW_DOING_RESET:
+ case VIRTIO_CCW_DOING_READ_VQ_CONF:
+ case VIRTIO_CCW_DOING_SET_IND_ADAPTER:
+ case VIRTIO_CCW_DOING_SET_VIRTIO_REV:
+ vcdev->curr_io &= ~activity;
+ wake_up(&vcdev->wait_q);
+ break;
+ default:
+ /* don't know what to do... */
+ dev_warn(&vcdev->cdev->dev,
+ "Suspicious activity '%08x'\n", activity);
+ WARN_ON(1);
+ break;
+ }
+ }
+}
+
static void virtio_ccw_int_handler(struct ccw_device *cdev,
unsigned long intparm,
struct irb *irb)
@@ -995,6 +1025,12 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev,
if (!vcdev)
return;
+ if (IS_ERR(irb)) {
+ vcdev->err = PTR_ERR(irb);
+ virtio_ccw_check_activity(vcdev, activity);
+ /* Don't poke around indicators, something's wrong. */
+ return;
+ }
/* Check if it's a notification from the host. */
if ((intparm == 0) &&
(scsw_stctl(&irb->scsw) ==
@@ -1010,31 +1046,7 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev,
/* Map everything else to -EIO. */
vcdev->err = -EIO;
}
- if (vcdev->curr_io & activity) {
- switch (activity) {
- case VIRTIO_CCW_DOING_READ_FEAT:
- case VIRTIO_CCW_DOING_WRITE_FEAT:
- case VIRTIO_CCW_DOING_READ_CONFIG:
- case VIRTIO_CCW_DOING_WRITE_CONFIG:
- case VIRTIO_CCW_DOING_WRITE_STATUS:
- case VIRTIO_CCW_DOING_SET_VQ:
- case VIRTIO_CCW_DOING_SET_IND:
- case VIRTIO_CCW_DOING_SET_CONF_IND:
- case VIRTIO_CCW_DOING_RESET:
- case VIRTIO_CCW_DOING_READ_VQ_CONF:
- case VIRTIO_CCW_DOING_SET_IND_ADAPTER:
- case VIRTIO_CCW_DOING_SET_VIRTIO_REV:
- vcdev->curr_io &= ~activity;
- wake_up(&vcdev->wait_q);
- break;
- default:
- /* don't know what to do... */
- dev_warn(&cdev->dev, "Suspicious activity '%08x'\n",
- activity);
- WARN_ON(1);
- break;
- }
- }
+ virtio_ccw_check_activity(vcdev, activity);
for_each_set_bit(i, &vcdev->indicators,
sizeof(vcdev->indicators) * BITS_PER_BYTE) {
/* The bit clear must happen before the vring kick. */
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 5f692ae40749..64eed87d34a8 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -364,6 +364,7 @@ config SCSI_HPSA
tristate "HP Smart Array SCSI driver"
depends on PCI && SCSI
select CHECK_SIGNATURE
+ select SCSI_SAS_ATTRS
help
This driver supports HP Smart Array Controllers (circa 2009).
It is a SCSI alternative to the cciss driver, which is a block
@@ -499,6 +500,7 @@ config SCSI_ADVANSYS
tristate "AdvanSys SCSI support"
depends on SCSI
depends on ISA || EISA || PCI
+ depends on ISA_DMA_API || !ISA
help
This is a driver for all SCSI host adapters manufactured by
AdvanSys. It is documented in the kernel source in
diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index 519f9a4b3dad..febbd83e2ecd 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c
@@ -7803,7 +7803,7 @@ adv_build_req(struct asc_board *boardp, struct scsi_cmnd *scp,
return ASC_BUSY;
}
scsiqp->sense_addr = cpu_to_le32(sense_addr);
- scsiqp->sense_len = cpu_to_le32(SCSI_SENSE_BUFFERSIZE);
+ scsiqp->sense_len = SCSI_SENSE_BUFFERSIZE;
/* Build ADV_SCSI_REQ_Q */
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 323982fd00c3..82ac1cd818ac 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -333,6 +333,17 @@ static void scsi_host_dev_release(struct device *dev)
kfree(queuedata);
}
+ if (shost->shost_state == SHOST_CREATED) {
+ /*
+ * Free the shost_dev device name here if scsi_host_alloc()
+ * and scsi_host_put() have been called but neither
+ * scsi_host_add() nor scsi_host_remove() has been called.
+ * This avoids that the memory allocated for the shost_dev
+ * name is leaked.
+ */
+ kfree(dev_name(&shost->shost_dev));
+ }
+
scsi_destroy_command_freelist(shost);
if (shost_use_blk_mq(shost)) {
if (shost->tag_set.tags)
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 6a8f95808ee0..a3860367b568 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -8671,7 +8671,7 @@ static void hpsa_disable_rld_caching(struct ctlr_info *h)
if ((rc != 0) || (c->err_info->CommandStatus != 0))
goto errout;
- if (*options && HPSA_DIAG_OPTS_DISABLE_RLD_CACHING)
+ if (*options & HPSA_DIAG_OPTS_DISABLE_RLD_CACHING)
goto out;
errout:
diff --git a/drivers/scsi/mpt3sas/Kconfig b/drivers/scsi/mpt3sas/Kconfig
index 29061467cc17..b736dbc80485 100644
--- a/drivers/scsi/mpt3sas/Kconfig
+++ b/drivers/scsi/mpt3sas/Kconfig
@@ -71,3 +71,12 @@ config SCSI_MPT3SAS_MAX_SGE
MAX_PHYS_SEGMENTS in most kernels. However in SuSE kernels this
can be 256. However, it may decreased down to 16. Decreasing this
parameter will reduce memory requirements on a per controller instance.
+
+config SCSI_MPT2SAS
+ tristate "Legacy MPT2SAS config option"
+ default n
+ select SCSI_MPT3SAS
+ depends on PCI && SCSI
+ ---help---
+ Dummy config option for backwards compatiblity: configure the MPT3SAS
+ driver instead.
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index d95206b7e116..9ab77b06434d 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -3905,8 +3905,7 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
* We do not expose raid functionality to upper layer for warpdrive.
*/
if (!ioc->is_warpdrive && !scsih_is_raid(&scmd->device->sdev_gendev)
- && (sas_device_priv_data->flags & MPT_DEVICE_TLR_ON) &&
- scmd->cmd_len != 32)
+ && sas_is_tlr_enabled(scmd->device) && scmd->cmd_len != 32)
mpi_control |= MPI2_SCSIIO_CONTROL_TLR_ON;
smid = mpt3sas_base_get_smid_scsiio(ioc, ioc->scsi_io_cb_idx, scmd);
diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c
index 90fdf0e859e3..675e7fab0796 100644
--- a/drivers/scsi/mvsas/mv_init.c
+++ b/drivers/scsi/mvsas/mv_init.c
@@ -758,7 +758,7 @@ mvs_store_interrupt_coalescing(struct device *cdev,
struct device_attribute *attr,
const char *buffer, size_t size)
{
- int val = 0;
+ unsigned int val = 0;
struct mvs_info *mvi = NULL;
struct Scsi_Host *shost = class_to_shost(cdev);
struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
@@ -766,7 +766,7 @@ mvs_store_interrupt_coalescing(struct device *cdev,
if (buffer == NULL)
return size;
- if (sscanf(buffer, "%d", &val) != 1)
+ if (sscanf(buffer, "%u", &val) != 1)
return -EINVAL;
if (val >= 0x10000) {
diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c
index eb0cc5475c45..b6b4cfdd7620 100644
--- a/drivers/scsi/qla2xxx/qla_nx.c
+++ b/drivers/scsi/qla2xxx/qla_nx.c
@@ -433,7 +433,7 @@ qla82xx_pci_get_crb_addr_2M(struct qla_hw_data *ha, ulong off_in,
if (off_in < QLA82XX_PCI_CRBSPACE)
return -1;
- *off_out = (void __iomem *)(off_in - QLA82XX_PCI_CRBSPACE);
+ off_in -= QLA82XX_PCI_CRBSPACE;
/* Try direct map */
m = &crb_128M_2M_map[CRB_BLK(off_in)].sub_block[CRB_SUBBLK(off_in)];
@@ -443,6 +443,7 @@ qla82xx_pci_get_crb_addr_2M(struct qla_hw_data *ha, ulong off_in,
return 0;
}
/* Not in direct map, use crb window */
+ *off_out = (void __iomem *)off_in;
return 1;
}
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 3ba2e9564b9a..81af294f15a7 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -902,7 +902,7 @@ static ssize_t tcm_qla2xxx_tpg_fabric_prot_type_show(struct config_item *item,
return sprintf(page, "%d\n", tpg->tpg_attrib.fabric_prot_type);
}
-CONFIGFS_ATTR_WO(tcm_qla2xxx_tpg_, enable);
+CONFIGFS_ATTR(tcm_qla2xxx_tpg_, enable);
CONFIGFS_ATTR_RO(tcm_qla2xxx_tpg_, dynamic_sessions);
CONFIGFS_ATTR(tcm_qla2xxx_tpg_, fabric_prot_type);
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index dfcc45bb03b1..d09d60293c27 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -465,8 +465,9 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = {
0} },
{0, 0, 0, F_INV_OP | FF_RESPOND, NULL, NULL, /* MAINT OUT */
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
- {0, 0, 0, F_INV_OP | FF_RESPOND, NULL, NULL, /* VERIFY */
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
+ {0, 0x2f, 0, F_D_OUT_MAYBE | FF_DIRECT_IO, NULL, NULL, /* VERIFY(10) */
+ {10, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc7,
+ 0, 0, 0, 0, 0, 0} },
{1, 0x7f, 0x9, F_SA_HIGH | F_D_IN | FF_DIRECT_IO, resp_read_dt0,
vl_iarr, {32, 0xc7, 0, 0, 0, 0, 0x1f, 0x18, 0x0, 0x9, 0xfe, 0,
0xff, 0xff, 0xff, 0xff} },/* VARIABLE LENGTH, READ(32) */
@@ -477,8 +478,8 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = {
{10, 0x13, 0xff, 0xff, 0, 0, 0, 0xff, 0xff, 0xc7, 0, 0, 0, 0, 0,
0} },
/* 20 */
- {0, 0, 0, F_INV_OP | FF_RESPOND, NULL, NULL, /* ALLOW REMOVAL */
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
+ {0, 0x1e, 0, 0, NULL, NULL, /* ALLOW REMOVAL */
+ {6, 0, 0, 0, 0x3, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
{0, 0x1, 0, 0, resp_start_stop, NULL, /* REWIND ?? */
{6, 0x1, 0, 0, 0, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
{0, 0, 0, F_INV_OP | FF_RESPOND, NULL, NULL, /* ATA_PT */
diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c
index e4b799837948..459abe1dcc87 100644
--- a/drivers/scsi/scsi_pm.c
+++ b/drivers/scsi/scsi_pm.c
@@ -219,13 +219,13 @@ static int sdev_runtime_suspend(struct device *dev)
struct scsi_device *sdev = to_scsi_device(dev);
int err = 0;
- if (pm && pm->runtime_suspend) {
- err = blk_pre_runtime_suspend(sdev->request_queue);
- if (err)
- return err;
+ err = blk_pre_runtime_suspend(sdev->request_queue);
+ if (err)
+ return err;
+ if (pm && pm->runtime_suspend)
err = pm->runtime_suspend(dev);
- blk_post_runtime_suspend(sdev->request_queue, err);
- }
+ blk_post_runtime_suspend(sdev->request_queue, err);
+
return err;
}
@@ -248,11 +248,11 @@ static int sdev_runtime_resume(struct device *dev)
const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
int err = 0;
- if (pm && pm->runtime_resume) {
- blk_pre_runtime_resume(sdev->request_queue);
+ blk_pre_runtime_resume(sdev->request_queue);
+ if (pm && pm->runtime_resume)
err = pm->runtime_resume(dev);
- blk_post_runtime_resume(sdev->request_queue, err);
- }
+ blk_post_runtime_resume(sdev->request_queue, err);
+
return err;
}
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 83245391e956..054923e3393c 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -701,9 +701,12 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
* strings.
*/
if (sdev->inquiry_len < 36) {
- sdev_printk(KERN_INFO, sdev,
- "scsi scan: INQUIRY result too short (%d),"
- " using 36\n", sdev->inquiry_len);
+ if (!sdev->host->short_inquiry) {
+ shost_printk(KERN_INFO, sdev->host,
+ "scsi scan: INQUIRY result too short (%d),"
+ " using 36\n", sdev->inquiry_len);
+ sdev->host->short_inquiry = 1;
+ }
sdev->inquiry_len = 36;
}
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 8d2312239ae0..21930c9ac9cd 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1102,6 +1102,14 @@ void __scsi_remove_device(struct scsi_device *sdev)
{
struct device *dev = &sdev->sdev_gendev;
+ /*
+ * This cleanup path is not reentrant and while it is impossible
+ * to get a new reference with scsi_device_get() someone can still
+ * hold a previously acquired one.
+ */
+ if (sdev->sdev_state == SDEV_DEL)
+ return;
+
if (sdev->is_visible) {
if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
return;
@@ -1110,7 +1118,9 @@ void __scsi_remove_device(struct scsi_device *sdev)
device_unregister(&sdev->sdev_dev);
transport_remove_device(dev);
scsi_dh_remove_device(sdev);
- }
+ device_del(dev);
+ } else
+ put_device(&sdev->sdev_dev);
/*
* Stop accepting new requests and wait until all queuecommand() and
@@ -1121,16 +1131,6 @@ void __scsi_remove_device(struct scsi_device *sdev)
blk_cleanup_queue(sdev->request_queue);
cancel_work_sync(&sdev->requeue_work);
- /*
- * Remove the device after blk_cleanup_queue() has been called such
- * a possible bdi_register() call with the same name occurs after
- * blk_cleanup_queue() has called bdi_destroy().
- */
- if (sdev->is_visible)
- device_del(dev);
- else
- put_device(&sdev->sdev_dev);
-
if (sdev->host->hostt->slave_destroy)
sdev->host->hostt->slave_destroy(sdev);
transport_destroy_device(dev);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 54519804c46a..4e08d1cd704d 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -638,11 +638,24 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
unsigned int max_blocks = 0;
q->limits.discard_zeroes_data = 0;
- q->limits.discard_alignment = sdkp->unmap_alignment *
- logical_block_size;
- q->limits.discard_granularity =
- max(sdkp->physical_block_size,
- sdkp->unmap_granularity * logical_block_size);
+
+ /*
+ * When LBPRZ is reported, discard alignment and granularity
+ * must be fixed to the logical block size. Otherwise the block
+ * layer will drop misaligned portions of the request which can
+ * lead to data corruption. If LBPRZ is not set, we honor the
+ * device preference.
+ */
+ if (sdkp->lbprz) {
+ q->limits.discard_alignment = 0;
+ q->limits.discard_granularity = 1;
+ } else {
+ q->limits.discard_alignment = sdkp->unmap_alignment *
+ logical_block_size;
+ q->limits.discard_granularity =
+ max(sdkp->physical_block_size,
+ sdkp->unmap_granularity * logical_block_size);
+ }
sdkp->provisioning_mode = mode;
@@ -2321,11 +2334,8 @@ got_data:
}
}
- if (sdkp->capacity > 0xffffffff) {
+ if (sdkp->capacity > 0xffffffff)
sdp->use_16_for_rw = 1;
- sdkp->max_xfer_blocks = SD_MAX_XFER_BLOCKS;
- } else
- sdkp->max_xfer_blocks = SD_DEF_XFER_BLOCKS;
/* Rescale capacity to 512-byte units */
if (sector_size == 4096)
@@ -2642,7 +2652,6 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
{
unsigned int sector_sz = sdkp->device->sector_size;
const int vpd_len = 64;
- u32 max_xfer_length;
unsigned char *buffer = kmalloc(vpd_len, GFP_KERNEL);
if (!buffer ||
@@ -2650,14 +2659,11 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
scsi_get_vpd_page(sdkp->device, 0xb0, buffer, vpd_len))
goto out;
- max_xfer_length = get_unaligned_be32(&buffer[8]);
- if (max_xfer_length)
- sdkp->max_xfer_blocks = max_xfer_length;
-
blk_queue_io_min(sdkp->disk->queue,
get_unaligned_be16(&buffer[6]) * sector_sz);
- blk_queue_io_opt(sdkp->disk->queue,
- get_unaligned_be32(&buffer[12]) * sector_sz);
+
+ sdkp->max_xfer_blocks = get_unaligned_be32(&buffer[8]);
+ sdkp->opt_xfer_blocks = get_unaligned_be32(&buffer[12]);
if (buffer[3] == 0x3c) {
unsigned int lba_count, desc_count;
@@ -2806,6 +2812,11 @@ static int sd_try_extended_inquiry(struct scsi_device *sdp)
return 0;
}
+static inline u32 logical_to_sectors(struct scsi_device *sdev, u32 blocks)
+{
+ return blocks << (ilog2(sdev->sector_size) - 9);
+}
+
/**
* sd_revalidate_disk - called the first time a new disk is seen,
* performs disk spin up, read_capacity, etc.
@@ -2815,8 +2826,9 @@ static int sd_revalidate_disk(struct gendisk *disk)
{
struct scsi_disk *sdkp = scsi_disk(disk);
struct scsi_device *sdp = sdkp->device;
+ struct request_queue *q = sdkp->disk->queue;
unsigned char *buffer;
- unsigned int max_xfer;
+ unsigned int dev_max, rw_max;
SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp,
"sd_revalidate_disk\n"));
@@ -2864,11 +2876,29 @@ static int sd_revalidate_disk(struct gendisk *disk)
*/
sd_set_flush_flag(sdkp);
- max_xfer = sdkp->max_xfer_blocks;
- max_xfer <<= ilog2(sdp->sector_size) - 9;
+ /* Initial block count limit based on CDB TRANSFER LENGTH field size. */
+ dev_max = sdp->use_16_for_rw ? SD_MAX_XFER_BLOCKS : SD_DEF_XFER_BLOCKS;
+
+ /* Some devices report a maximum block count for READ/WRITE requests. */
+ dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks);
+ q->limits.max_dev_sectors = logical_to_sectors(sdp, dev_max);
+
+ /*
+ * Use the device's preferred I/O size for reads and writes
+ * unless the reported value is unreasonably small, large, or
+ * garbage.
+ */
+ if (sdkp->opt_xfer_blocks &&
+ sdkp->opt_xfer_blocks <= dev_max &&
+ sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
+ sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_CACHE_SIZE)
+ rw_max = q->limits.io_opt =
+ logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
+ else
+ rw_max = BLK_DEF_MAX_SECTORS;
- sdkp->disk->queue->limits.max_sectors =
- min_not_zero(queue_max_hw_sectors(sdkp->disk->queue), max_xfer);
+ /* Combine with controller limits */
+ q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q));
set_capacity(disk, sdkp->capacity);
sd_config_write_same(sdkp);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 63ba5ca7f9a1..5f2a84aff29f 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -67,6 +67,7 @@ struct scsi_disk {
atomic_t openers;
sector_t capacity; /* size in 512-byte sectors */
u32 max_xfer_blocks;
+ u32 opt_xfer_blocks;
u32 max_ws_blocks;
u32 max_unmap_blocks;
u32 unmap_granularity;
diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c
index dcb0d76d7312..044d06410d4c 100644
--- a/drivers/scsi/ses.c
+++ b/drivers/scsi/ses.c
@@ -84,6 +84,7 @@ static void init_device_slot_control(unsigned char *dest_desc,
static int ses_recv_diag(struct scsi_device *sdev, int page_code,
void *buf, int bufflen)
{
+ int ret;
unsigned char cmd[] = {
RECEIVE_DIAGNOSTIC,
1, /* Set PCV bit */
@@ -92,9 +93,26 @@ static int ses_recv_diag(struct scsi_device *sdev, int page_code,
bufflen & 0xff,
0
};
+ unsigned char recv_page_code;
- return scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf, bufflen,
+ ret = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf, bufflen,
NULL, SES_TIMEOUT, SES_RETRIES, NULL);
+ if (unlikely(!ret))
+ return ret;
+
+ recv_page_code = ((unsigned char *)buf)[0];
+
+ if (likely(recv_page_code == page_code))
+ return ret;
+
+ /* successful diagnostic but wrong page code. This happens to some
+ * USB devices, just print a message and pretend there was an error */
+
+ sdev_printk(KERN_ERR, sdev,
+ "Wrong diagnostic page; asked for %d got %u\n",
+ page_code, recv_page_code);
+
+ return -EINVAL;
}
static int ses_send_diag(struct scsi_device *sdev, int page_code,
@@ -541,7 +559,15 @@ static void ses_enclosure_data_process(struct enclosure_device *edev,
if (desc_ptr)
desc_ptr += len;
- if (addl_desc_ptr)
+ if (addl_desc_ptr &&
+ /* only find additional descriptions for specific devices */
+ (type_ptr[0] == ENCLOSURE_COMPONENT_DEVICE ||
+ type_ptr[0] == ENCLOSURE_COMPONENT_ARRAY_DEVICE ||
+ type_ptr[0] == ENCLOSURE_COMPONENT_SAS_EXPANDER ||
+ /* these elements are optional */
+ type_ptr[0] == ENCLOSURE_COMPONENT_SCSI_TARGET_PORT ||
+ type_ptr[0] == ENCLOSURE_COMPONENT_SCSI_INITIATOR_PORT ||
+ type_ptr[0] == ENCLOSURE_COMPONENT_CONTROLLER_ELECTRONICS))
addl_desc_ptr += addl_desc_ptr[1] + 2;
}
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index e0a1e52a04e7..2e522951b619 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -4083,6 +4083,7 @@ static int create_one_cdev(struct scsi_tape *tape, int mode, int rew)
}
cdev->owner = THIS_MODULE;
cdev->ops = &st_fops;
+ STm->cdevs[rew] = cdev;
error = cdev_add(cdev, cdev_devno, 1);
if (error) {
@@ -4091,7 +4092,6 @@ static int create_one_cdev(struct scsi_tape *tape, int mode, int rew)
pr_err("st%d: Device not attached.\n", dev_num);
goto out_free;
}
- STm->cdevs[rew] = cdev;
i = mode << (4 - ST_NBR_MODE_BITS);
snprintf(name, 10, "%s%s%s", rew ? "n" : "",
@@ -4110,8 +4110,9 @@ static int create_one_cdev(struct scsi_tape *tape, int mode, int rew)
return 0;
out_free:
cdev_del(STm->cdevs[rew]);
- STm->cdevs[rew] = NULL;
out:
+ STm->cdevs[rew] = NULL;
+ STm->devs[rew] = NULL;
return error;
}
diff --git a/drivers/sh/pm_runtime.c b/drivers/sh/pm_runtime.c
index 25abd4eb7d10..91a003011acf 100644
--- a/drivers/sh/pm_runtime.c
+++ b/drivers/sh/pm_runtime.c
@@ -34,7 +34,7 @@ static struct pm_clk_notifier_block platform_bus_notifier = {
static int __init sh_pm_runtime_init(void)
{
- if (IS_ENABLED(CONFIG_ARCH_SHMOBILE_MULTI)) {
+ if (IS_ENABLED(CONFIG_ARCH_SHMOBILE)) {
if (!of_find_compatible_node(NULL, NULL,
"renesas,cpg-mstp-clocks"))
return 0;
diff --git a/drivers/soc/mediatek/Kconfig b/drivers/soc/mediatek/Kconfig
index 9d5068248aa0..0a4ea809a61b 100644
--- a/drivers/soc/mediatek/Kconfig
+++ b/drivers/soc/mediatek/Kconfig
@@ -23,6 +23,7 @@ config MTK_PMIC_WRAP
config MTK_SCPSYS
bool "MediaTek SCPSYS Support"
depends on ARCH_MEDIATEK || COMPILE_TEST
+ default ARM64 && ARCH_MEDIATEK
select REGMAP
select MTK_INFRACFG
select PM_GENERIC_DOMAINS if PM
diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c
index f3a0b6a4b54e..8c03a80b482d 100644
--- a/drivers/soc/ti/knav_qmss_queue.c
+++ b/drivers/soc/ti/knav_qmss_queue.c
@@ -1179,7 +1179,7 @@ static int knav_queue_setup_link_ram(struct knav_device *kdev)
block++;
if (!block->size)
- return 0;
+ continue;
dev_dbg(kdev->dev, "linkram1: phys:%x, virt:%p, size:%x\n",
block->phys, block->virt, block->size);
@@ -1519,9 +1519,9 @@ static int knav_queue_load_pdsp(struct knav_device *kdev,
for (i = 0; i < ARRAY_SIZE(knav_acc_firmwares); i++) {
if (knav_acc_firmwares[i]) {
- ret = request_firmware(&fw,
- knav_acc_firmwares[i],
- kdev->dev);
+ ret = request_firmware_direct(&fw,
+ knav_acc_firmwares[i],
+ kdev->dev);
if (!ret) {
found = true;
break;
diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c
index 06858e04ec59..bf9a610e5b89 100644
--- a/drivers/spi/spi-bcm63xx.c
+++ b/drivers/spi/spi-bcm63xx.c
@@ -562,8 +562,8 @@ static int bcm63xx_spi_probe(struct platform_device *pdev)
goto out_clk_disable;
}
- dev_info(dev, "at 0x%08x (irq %d, FIFOs size %d)\n",
- r->start, irq, bs->fifo_size);
+ dev_info(dev, "at %pr (irq %d, FIFOs size %d)\n",
+ r, irq, bs->fifo_size);
return 0;
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 59a11437db70..39412c9097c6 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -167,7 +167,7 @@ static inline int is_double_byte_mode(struct fsl_dspi *dspi)
{
unsigned int val;
- regmap_read(dspi->regmap, SPI_CTAR(dspi->cs), &val);
+ regmap_read(dspi->regmap, SPI_CTAR(0), &val);
return ((val & SPI_FRAME_BITS_MASK) == SPI_FRAME_BITS(8)) ? 0 : 1;
}
@@ -257,7 +257,7 @@ static u32 dspi_data_to_pushr(struct fsl_dspi *dspi, int tx_word)
return SPI_PUSHR_TXDATA(d16) |
SPI_PUSHR_PCS(dspi->cs) |
- SPI_PUSHR_CTAS(dspi->cs) |
+ SPI_PUSHR_CTAS(0) |
SPI_PUSHR_CONT;
}
@@ -290,7 +290,7 @@ static int dspi_eoq_write(struct fsl_dspi *dspi)
*/
if (tx_word && (dspi->len == 1)) {
dspi->dataflags |= TRAN_STATE_WORD_ODD_NUM;
- regmap_update_bits(dspi->regmap, SPI_CTAR(dspi->cs),
+ regmap_update_bits(dspi->regmap, SPI_CTAR(0),
SPI_FRAME_BITS_MASK, SPI_FRAME_BITS(8));
tx_word = 0;
}
@@ -339,7 +339,7 @@ static int dspi_tcfq_write(struct fsl_dspi *dspi)
if (tx_word && (dspi->len == 1)) {
dspi->dataflags |= TRAN_STATE_WORD_ODD_NUM;
- regmap_update_bits(dspi->regmap, SPI_CTAR(dspi->cs),
+ regmap_update_bits(dspi->regmap, SPI_CTAR(0),
SPI_FRAME_BITS_MASK, SPI_FRAME_BITS(8));
tx_word = 0;
}
@@ -407,7 +407,7 @@ static int dspi_transfer_one_message(struct spi_master *master,
regmap_update_bits(dspi->regmap, SPI_MCR,
SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF,
SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF);
- regmap_write(dspi->regmap, SPI_CTAR(dspi->cs),
+ regmap_write(dspi->regmap, SPI_CTAR(0),
dspi->cur_chip->ctar_val);
trans_mode = dspi->devtype_data->trans_mode;
@@ -566,7 +566,7 @@ static irqreturn_t dspi_interrupt(int irq, void *dev_id)
if (!dspi->len) {
if (dspi->dataflags & TRAN_STATE_WORD_ODD_NUM) {
regmap_update_bits(dspi->regmap,
- SPI_CTAR(dspi->cs),
+ SPI_CTAR(0),
SPI_FRAME_BITS_MASK,
SPI_FRAME_BITS(16));
dspi->dataflags &= ~TRAN_STATE_WORD_ODD_NUM;
diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index 563954a61424..7840067062a8 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -410,7 +410,7 @@ static int mtk_spi_setup(struct spi_device *spi)
if (!spi->controller_data)
spi->controller_data = (void *)&mtk_default_chip_info;
- if (mdata->dev_comp->need_pad_sel)
+ if (mdata->dev_comp->need_pad_sel && gpio_is_valid(spi->cs_gpio))
gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
return 0;
@@ -632,13 +632,23 @@ static int mtk_spi_probe(struct platform_device *pdev)
goto err_put_master;
}
- for (i = 0; i < master->num_chipselect; i++) {
- ret = devm_gpio_request(&pdev->dev, master->cs_gpios[i],
- dev_name(&pdev->dev));
- if (ret) {
- dev_err(&pdev->dev,
- "can't get CS GPIO %i\n", i);
- goto err_put_master;
+ if (!master->cs_gpios && master->num_chipselect > 1) {
+ dev_err(&pdev->dev,
+ "cs_gpios not specified and num_chipselect > 1\n");
+ ret = -EINVAL;
+ goto err_put_master;
+ }
+
+ if (master->cs_gpios) {
+ for (i = 0; i < master->num_chipselect; i++) {
+ ret = devm_gpio_request(&pdev->dev,
+ master->cs_gpios[i],
+ dev_name(&pdev->dev));
+ if (ret) {
+ dev_err(&pdev->dev,
+ "can't get CS GPIO %i\n", i);
+ goto err_put_master;
+ }
}
}
}
diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index 94af80676684..5e5fd77e2711 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -1171,19 +1171,31 @@ err_no_rxchan:
static int pl022_dma_autoprobe(struct pl022 *pl022)
{
struct device *dev = &pl022->adev->dev;
+ struct dma_chan *chan;
+ int err;
/* automatically configure DMA channels from platform, normally using DT */
- pl022->dma_rx_channel = dma_request_slave_channel(dev, "rx");
- if (!pl022->dma_rx_channel)
+ chan = dma_request_slave_channel_reason(dev, "rx");
+ if (IS_ERR(chan)) {
+ err = PTR_ERR(chan);
goto err_no_rxchan;
+ }
+
+ pl022->dma_rx_channel = chan;
- pl022->dma_tx_channel = dma_request_slave_channel(dev, "tx");
- if (!pl022->dma_tx_channel)
+ chan = dma_request_slave_channel_reason(dev, "tx");
+ if (IS_ERR(chan)) {
+ err = PTR_ERR(chan);
goto err_no_txchan;
+ }
+
+ pl022->dma_tx_channel = chan;
pl022->dummypage = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!pl022->dummypage)
+ if (!pl022->dummypage) {
+ err = -ENOMEM;
goto err_no_dummypage;
+ }
return 0;
@@ -1194,7 +1206,7 @@ err_no_txchan:
dma_release_channel(pl022->dma_rx_channel);
pl022->dma_rx_channel = NULL;
err_no_rxchan:
- return -ENODEV;
+ return err;
}
static void terminate_dma(struct pl022 *pl022)
@@ -2236,6 +2248,10 @@ static int pl022_probe(struct amba_device *adev, const struct amba_id *id)
/* Get DMA channels, try autoconfiguration first */
status = pl022_dma_autoprobe(pl022);
+ if (status == -EPROBE_DEFER) {
+ dev_dbg(dev, "deferring probe to get DMA channel\n");
+ goto err_no_irq;
+ }
/* If that failed, use channels from platform_info */
if (status == 0)
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index e2415be209d5..dee1cb87d24f 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -376,6 +376,7 @@ static void spi_drv_shutdown(struct device *dev)
/**
* __spi_register_driver - register a SPI driver
+ * @owner: owner module of the driver to register
* @sdrv: the driver to register
* Context: can sleep
*
@@ -1704,7 +1705,7 @@ struct spi_master *spi_alloc_master(struct device *dev, unsigned size)
master->bus_num = -1;
master->num_chipselect = 1;
master->dev.class = &spi_master_class;
- master->dev.parent = get_device(dev);
+ master->dev.parent = dev;
spi_master_set_devdata(master, &master[1]);
return master;
@@ -2130,6 +2131,7 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
* Set transfer tx_nbits and rx_nbits as single transfer default
* (SPI_NBITS_SINGLE) if it is not set for this transfer.
*/
+ message->frame_length = 0;
list_for_each_entry(xfer, &message->transfers, transfer_list) {
message->frame_length += xfer->len;
if (!xfer->bits_per_word)
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 91a0fcd72423..d0e7dfc647cf 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -651,11 +651,11 @@ static int spidev_release(struct inode *inode, struct file *filp)
kfree(spidev->rx_buffer);
spidev->rx_buffer = NULL;
+ spin_lock_irq(&spidev->spi_lock);
if (spidev->spi)
spidev->speed_hz = spidev->spi->max_speed_hz;
/* ... after we unbound from the underlying device? */
- spin_lock_irq(&spidev->spi_lock);
dofree = (spidev->spi == NULL);
spin_unlock_irq(&spidev->spi_lock);
diff --git a/drivers/staging/android/ion/ion_chunk_heap.c b/drivers/staging/android/ion/ion_chunk_heap.c
index 195c41d7bd53..0813163f962f 100644
--- a/drivers/staging/android/ion/ion_chunk_heap.c
+++ b/drivers/staging/android/ion/ion_chunk_heap.c
@@ -81,7 +81,7 @@ static int ion_chunk_heap_allocate(struct ion_heap *heap,
err:
sg = table->sgl;
for (i -= 1; i >= 0; i--) {
- gen_pool_free(chunk_heap->pool, sg_phys(sg) & PAGE_MASK,
+ gen_pool_free(chunk_heap->pool, page_to_phys(sg_page(sg)),
sg->length);
sg = sg_next(sg);
}
@@ -109,7 +109,7 @@ static void ion_chunk_heap_free(struct ion_buffer *buffer)
DMA_BIDIRECTIONAL);
for_each_sg(table->sgl, sg, table->nents, i) {
- gen_pool_free(chunk_heap->pool, sg_phys(sg) & PAGE_MASK,
+ gen_pool_free(chunk_heap->pool, page_to_phys(sg_page(sg)),
sg->length);
}
chunk_heap->allocated -= allocated_size;
diff --git a/drivers/staging/iio/Kconfig b/drivers/staging/iio/Kconfig
index 6d5b38d69578..9d7f0004d2d7 100644
--- a/drivers/staging/iio/Kconfig
+++ b/drivers/staging/iio/Kconfig
@@ -18,7 +18,8 @@ source "drivers/staging/iio/resolver/Kconfig"
source "drivers/staging/iio/trigger/Kconfig"
config IIO_DUMMY_EVGEN
- tristate
+ tristate
+ select IRQ_WORK
config IIO_SIMPLE_DUMMY
tristate "An example driver with no hardware requirements"
diff --git a/drivers/staging/iio/adc/lpc32xx_adc.c b/drivers/staging/iio/adc/lpc32xx_adc.c
index d11c54b72186..b51f237cd817 100644
--- a/drivers/staging/iio/adc/lpc32xx_adc.c
+++ b/drivers/staging/iio/adc/lpc32xx_adc.c
@@ -76,7 +76,7 @@ static int lpc32xx_read_raw(struct iio_dev *indio_dev,
if (mask == IIO_CHAN_INFO_RAW) {
mutex_lock(&indio_dev->mlock);
- clk_enable(info->clk);
+ clk_prepare_enable(info->clk);
/* Measurement setup */
__raw_writel(AD_INTERNAL | (chan->address) | AD_REFp | AD_REFm,
LPC32XX_ADC_SELECT(info->adc_base));
@@ -84,7 +84,7 @@ static int lpc32xx_read_raw(struct iio_dev *indio_dev,
__raw_writel(AD_PDN_CTRL | AD_STROBE,
LPC32XX_ADC_CTRL(info->adc_base));
wait_for_completion(&info->completion); /* set by ISR */
- clk_disable(info->clk);
+ clk_disable_unprepare(info->clk);
*val = info->value;
mutex_unlock(&indio_dev->mlock);
diff --git a/drivers/staging/iio/iio_simple_dummy_events.c b/drivers/staging/iio/iio_simple_dummy_events.c
index bfbf1c56bd22..6eb600ff7056 100644
--- a/drivers/staging/iio/iio_simple_dummy_events.c
+++ b/drivers/staging/iio/iio_simple_dummy_events.c
@@ -159,7 +159,7 @@ static irqreturn_t iio_simple_dummy_get_timestamp(int irq, void *private)
struct iio_dummy_state *st = iio_priv(indio_dev);
st->event_timestamp = iio_get_time_ns();
- return IRQ_HANDLED;
+ return IRQ_WAKE_THREAD;
}
/**
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
index f5d741f25ffd..485ab2670918 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
@@ -110,7 +110,6 @@ struct libcfs_ioctl_handler {
#define IOC_LIBCFS_CLEAR_DEBUG _IOWR('e', 31, long)
#define IOC_LIBCFS_MARK_DEBUG _IOWR('e', 32, long)
#define IOC_LIBCFS_MEMHOG _IOWR('e', 36, long)
-#define IOC_LIBCFS_PING_TEST _IOWR('e', 37, long)
/* lnet ioctls */
#define IOC_LIBCFS_GET_NI _IOWR('e', 50, long)
#define IOC_LIBCFS_FAIL_NID _IOWR('e', 51, long)
diff --git a/drivers/staging/lustre/lustre/libcfs/module.c b/drivers/staging/lustre/lustre/libcfs/module.c
index 07a68594c279..e7c2b26156b9 100644
--- a/drivers/staging/lustre/lustre/libcfs/module.c
+++ b/drivers/staging/lustre/lustre/libcfs/module.c
@@ -274,23 +274,6 @@ static int libcfs_ioctl_int(struct cfs_psdev_file *pfile, unsigned long cmd,
}
break;
- case IOC_LIBCFS_PING_TEST: {
- extern void (kping_client)(struct libcfs_ioctl_data *);
- void (*ping)(struct libcfs_ioctl_data *);
-
- CDEBUG(D_IOCTL, "doing %d pings to nid %s (%s)\n",
- data->ioc_count, libcfs_nid2str(data->ioc_nid),
- libcfs_nid2str(data->ioc_nid));
- ping = symbol_get(kping_client);
- if (!ping)
- CERROR("symbol_get failed\n");
- else {
- ping(data);
- symbol_put(kping_client);
- }
- return 0;
- }
-
default: {
struct libcfs_ioctl_handler *hand;
diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c
index 69b203651905..e489a3271f06 100644
--- a/drivers/staging/lustre/lustre/llite/symlink.c
+++ b/drivers/staging/lustre/lustre/llite/symlink.c
@@ -118,12 +118,20 @@ failed:
return rc;
}
-static const char *ll_follow_link(struct dentry *dentry, void **cookie)
+static void ll_put_link(void *p)
+{
+ ptlrpc_req_finished(p);
+}
+
+static const char *ll_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(dentry);
struct ptlrpc_request *request = NULL;
int rc;
char *symname = NULL;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
CDEBUG(D_VFSTRACE, "VFS Op\n");
ll_inode_size_lock(inode);
@@ -135,22 +143,16 @@ static const char *ll_follow_link(struct dentry *dentry, void **cookie)
}
/* symname may contain a pointer to the request message buffer,
- * we delay request releasing until ll_put_link then.
+ * we delay request releasing then.
*/
- *cookie = request;
+ set_delayed_call(done, ll_put_link, request);
return symname;
}
-static void ll_put_link(struct inode *unused, void *cookie)
-{
- ptlrpc_req_finished(cookie);
-}
-
struct inode_operations ll_fast_symlink_inode_operations = {
.readlink = generic_readlink,
.setattr = ll_setattr,
- .follow_link = ll_follow_link,
- .put_link = ll_put_link,
+ .get_link = ll_get_link,
.getattr = ll_getattr,
.permission = ll_inode_permission,
.setxattr = ll_setxattr,
diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c
index 4b7eb33f7d01..660b8ac37ae0 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -60,10 +60,10 @@
static
int get_xattr_type(const char *name)
{
- if (!strcmp(name, POSIX_ACL_XATTR_ACCESS))
+ if (!strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS))
return XATTR_ACL_ACCESS_T;
- if (!strcmp(name, POSIX_ACL_XATTR_DEFAULT))
+ if (!strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT))
return XATTR_ACL_DEFAULT_T;
if (!strncmp(name, XATTR_USER_PREFIX,
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c
index f61ef669644c..a4a9a763ff02 100644
--- a/drivers/staging/lustre/lustre/obdecho/echo_client.c
+++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c
@@ -1270,6 +1270,7 @@ static int
echo_copyout_lsm(struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob)
{
struct lov_stripe_md *ulsm = _ulsm;
+ struct lov_oinfo **p;
int nob, i;
nob = offsetof(struct lov_stripe_md, lsm_oinfo[lsm->lsm_stripe_count]);
@@ -1279,9 +1280,10 @@ echo_copyout_lsm(struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob)
if (copy_to_user(ulsm, lsm, sizeof(*ulsm)))
return -EFAULT;
- for (i = 0; i < lsm->lsm_stripe_count; i++) {
- if (copy_to_user(ulsm->lsm_oinfo[i], lsm->lsm_oinfo[i],
- sizeof(lsm->lsm_oinfo[0])))
+ for (i = 0, p = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, p++) {
+ struct lov_oinfo __user *up;
+ if (get_user(up, ulsm->lsm_oinfo + i) ||
+ copy_to_user(up, *p, sizeof(struct lov_oinfo)))
return -EFAULT;
}
return 0;
@@ -1289,9 +1291,10 @@ echo_copyout_lsm(struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob)
static int
echo_copyin_lsm(struct echo_device *ed, struct lov_stripe_md *lsm,
- void *ulsm, int ulsm_nob)
+ struct lov_stripe_md __user *ulsm, int ulsm_nob)
{
struct echo_client_obd *ec = ed->ed_ec;
+ struct lov_oinfo **p;
int i;
if (ulsm_nob < sizeof(*lsm))
@@ -1306,11 +1309,10 @@ echo_copyin_lsm(struct echo_device *ed, struct lov_stripe_md *lsm,
((__u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count > ~0UL))
return -EINVAL;
- for (i = 0; i < lsm->lsm_stripe_count; i++) {
- if (copy_from_user(lsm->lsm_oinfo[i],
- ((struct lov_stripe_md *)ulsm)-> \
- lsm_oinfo[i],
- sizeof(lsm->lsm_oinfo[0])))
+ for (i = 0, p = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, p++) {
+ struct lov_oinfo __user *up;
+ if (get_user(up, ulsm->lsm_oinfo + i) ||
+ copy_from_user(*p, up, sizeof(struct lov_oinfo)))
return -EFAULT;
}
return 0;
diff --git a/drivers/staging/wilc1000/coreconfigurator.c b/drivers/staging/wilc1000/coreconfigurator.c
index e10c6ffa698a..9568bdb6319b 100644
--- a/drivers/staging/wilc1000/coreconfigurator.c
+++ b/drivers/staging/wilc1000/coreconfigurator.c
@@ -13,12 +13,8 @@
#include "wilc_wlan.h"
#include <linux/errno.h>
#include <linux/slab.h>
-#include <linux/etherdevice.h>
#define TAG_PARAM_OFFSET (MAC_HDR_LEN + TIME_STAMP_LEN + \
BEACON_INTERVAL_LEN + CAP_INFO_LEN)
-#define ADDR1 4
-#define ADDR2 10
-#define ADDR3 16
/* Basic Frame Type Codes (2-bit) */
enum basic_frame_type {
@@ -175,32 +171,38 @@ static inline u8 get_from_ds(u8 *header)
return ((header[1] & 0x02) >> 1);
}
+/* This function extracts the MAC Address in 'address1' field of the MAC */
+/* header and updates the MAC Address in the allocated 'addr' variable. */
+static inline void get_address1(u8 *pu8msa, u8 *addr)
+{
+ memcpy(addr, pu8msa + 4, 6);
+}
+
+/* This function extracts the MAC Address in 'address2' field of the MAC */
+/* header and updates the MAC Address in the allocated 'addr' variable. */
+static inline void get_address2(u8 *pu8msa, u8 *addr)
+{
+ memcpy(addr, pu8msa + 10, 6);
+}
+
+/* This function extracts the MAC Address in 'address3' field of the MAC */
+/* header and updates the MAC Address in the allocated 'addr' variable. */
+static inline void get_address3(u8 *pu8msa, u8 *addr)
+{
+ memcpy(addr, pu8msa + 16, 6);
+}
+
/* This function extracts the BSSID from the incoming WLAN packet based on */
-/* the 'from ds' bit, and updates the MAC Address in the allocated 'data' */
+/* the 'from ds' bit, and updates the MAC Address in the allocated 'addr' */
/* variable. */
static inline void get_BSSID(u8 *data, u8 *bssid)
{
if (get_from_ds(data) == 1)
- /*
- * Extract the MAC Address in 'address2' field of the MAC
- * header and update the MAC Address in the allocated 'data'
- * variable.
- */
- ether_addr_copy(data, bssid + ADDR2);
+ get_address2(data, bssid);
else if (get_to_ds(data) == 1)
- /*
- * Extract the MAC Address in 'address1' field of the MAC
- * header and update the MAC Address in the allocated 'data'
- * variable.
- */
- ether_addr_copy(data, bssid + ADDR1);
+ get_address1(data, bssid);
else
- /*
- * Extract the MAC Address in 'address3' field of the MAC
- * header and update the MAC Address in the allocated 'data'
- * variable.
- */
- ether_addr_copy(data, bssid + ADDR3);
+ get_address3(data, bssid);
}
/* This function extracts the SSID from a beacon/probe response frame */
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 342a07c58d89..72204fbf2bb1 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -4074,6 +4074,17 @@ reject:
return iscsit_add_reject(conn, ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf);
}
+static bool iscsi_target_check_conn_state(struct iscsi_conn *conn)
+{
+ bool ret;
+
+ spin_lock_bh(&conn->state_lock);
+ ret = (conn->conn_state != TARG_CONN_STATE_LOGGED_IN);
+ spin_unlock_bh(&conn->state_lock);
+
+ return ret;
+}
+
int iscsi_target_rx_thread(void *arg)
{
int ret, rc;
@@ -4091,7 +4102,7 @@ int iscsi_target_rx_thread(void *arg)
* incoming iscsi/tcp socket I/O, and/or failing the connection.
*/
rc = wait_for_completion_interruptible(&conn->rx_login_comp);
- if (rc < 0)
+ if (rc < 0 || iscsi_target_check_conn_state(conn))
return 0;
if (conn->conn_transport->transport_type == ISCSI_INFINIBAND) {
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 5c964c09c89f..9fc9117d0f22 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -388,6 +388,7 @@ err:
if (login->login_complete) {
if (conn->rx_thread && conn->rx_thread_active) {
send_sig(SIGINT, conn->rx_thread, 1);
+ complete(&conn->rx_login_comp);
kthread_stop(conn->rx_thread);
}
if (conn->tx_thread && conn->tx_thread_active) {
diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c
index 51d1734d5390..2cbea2af7cd0 100644
--- a/drivers/target/iscsi/iscsi_target_parameters.c
+++ b/drivers/target/iscsi/iscsi_target_parameters.c
@@ -208,7 +208,7 @@ int iscsi_create_default_params(struct iscsi_param_list **param_list_ptr)
if (!pl) {
pr_err("Unable to allocate memory for"
" struct iscsi_param_list.\n");
- return -1 ;
+ return -ENOMEM;
}
INIT_LIST_HEAD(&pl->param_list);
INIT_LIST_HEAD(&pl->extra_response_list);
@@ -578,7 +578,7 @@ int iscsi_copy_param_list(
param_list = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL);
if (!param_list) {
pr_err("Unable to allocate memory for struct iscsi_param_list.\n");
- return -1;
+ return -ENOMEM;
}
INIT_LIST_HEAD(&param_list->param_list);
INIT_LIST_HEAD(&param_list->extra_response_list);
@@ -629,7 +629,7 @@ int iscsi_copy_param_list(
err_out:
iscsi_release_param_list(param_list);
- return -1;
+ return -ENOMEM;
}
static void iscsi_release_extra_responses(struct iscsi_param_list *param_list)
@@ -729,7 +729,7 @@ static int iscsi_add_notunderstood_response(
if (!extra_response) {
pr_err("Unable to allocate memory for"
" struct iscsi_extra_response.\n");
- return -1;
+ return -ENOMEM;
}
INIT_LIST_HEAD(&extra_response->er_list);
@@ -1370,7 +1370,7 @@ int iscsi_decode_text_input(
tmpbuf = kzalloc(length + 1, GFP_KERNEL);
if (!tmpbuf) {
pr_err("Unable to allocate %u + 1 bytes for tmpbuf.\n", length);
- return -1;
+ return -ENOMEM;
}
memcpy(tmpbuf, textbuf, length);
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 0b4b2a67d9f9..98698d875742 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -371,7 +371,8 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o
return 0;
}
-static sense_reason_t xdreadwrite_callback(struct se_cmd *cmd, bool success)
+static sense_reason_t xdreadwrite_callback(struct se_cmd *cmd, bool success,
+ int *post_ret)
{
unsigned char *buf, *addr;
struct scatterlist *sg;
@@ -437,7 +438,8 @@ sbc_execute_rw(struct se_cmd *cmd)
cmd->data_direction);
}
-static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success)
+static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success,
+ int *post_ret)
{
struct se_device *dev = cmd->se_dev;
@@ -447,8 +449,10 @@ static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success)
* sent to the backend driver.
*/
spin_lock_irq(&cmd->t_state_lock);
- if ((cmd->transport_state & CMD_T_SENT) && !cmd->scsi_status)
+ if ((cmd->transport_state & CMD_T_SENT) && !cmd->scsi_status) {
cmd->se_cmd_flags |= SCF_COMPARE_AND_WRITE_POST;
+ *post_ret = 1;
+ }
spin_unlock_irq(&cmd->t_state_lock);
/*
@@ -460,7 +464,8 @@ static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success)
return TCM_NO_SENSE;
}
-static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool success)
+static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool success,
+ int *post_ret)
{
struct se_device *dev = cmd->se_dev;
struct scatterlist *write_sg = NULL, *sg;
@@ -556,11 +561,11 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool succes
if (block_size < PAGE_SIZE) {
sg_set_page(&write_sg[i], m.page, block_size,
- block_size);
+ m.piter.sg->offset + block_size);
} else {
sg_miter_next(&m);
sg_set_page(&write_sg[i], m.page, block_size,
- 0);
+ m.piter.sg->offset);
}
len -= block_size;
i++;
diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c
index 273c72b2b83d..81a6b3e07687 100644
--- a/drivers/target/target_core_stat.c
+++ b/drivers/target/target_core_stat.c
@@ -246,7 +246,7 @@ static ssize_t target_stat_lu_prod_show(struct config_item *item, char *page)
char str[sizeof(dev->t10_wwn.model)+1];
/* scsiLuProductId */
- for (i = 0; i < sizeof(dev->t10_wwn.vendor); i++)
+ for (i = 0; i < sizeof(dev->t10_wwn.model); i++)
str[i] = ISPRINT(dev->t10_wwn.model[i]) ?
dev->t10_wwn.model[i] : ' ';
str[i] = '\0';
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 5b2820312310..28fb3016370f 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -130,6 +130,9 @@ void core_tmr_abort_task(
if (tmr->ref_task_tag != ref_tag)
continue;
+ if (!kref_get_unless_zero(&se_cmd->cmd_kref))
+ continue;
+
printk("ABORT_TASK: Found referenced %s task_tag: %llu\n",
se_cmd->se_tfo->get_fabric_name(), ref_tag);
@@ -139,13 +142,15 @@ void core_tmr_abort_task(
" skipping\n", ref_tag);
spin_unlock(&se_cmd->t_state_lock);
spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+
+ target_put_sess_cmd(se_cmd);
+
goto out;
}
se_cmd->transport_state |= CMD_T_ABORTED;
spin_unlock(&se_cmd->t_state_lock);
list_del_init(&se_cmd->se_cmd_list);
- kref_get(&se_cmd->cmd_kref);
spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
cancel_work_sync(&se_cmd->work);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 5bacc7b5ed6d..4fdcee2006d1 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1658,7 +1658,7 @@ bool target_stop_cmd(struct se_cmd *cmd, unsigned long *flags)
void transport_generic_request_failure(struct se_cmd *cmd,
sense_reason_t sense_reason)
{
- int ret = 0;
+ int ret = 0, post_ret = 0;
pr_debug("-----[ Storage Engine Exception for cmd: %p ITT: 0x%08llx"
" CDB: 0x%02x\n", cmd, cmd->tag, cmd->t_task_cdb[0]);
@@ -1680,7 +1680,7 @@ void transport_generic_request_failure(struct se_cmd *cmd,
*/
if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) &&
cmd->transport_complete_callback)
- cmd->transport_complete_callback(cmd, false);
+ cmd->transport_complete_callback(cmd, false, &post_ret);
switch (sense_reason) {
case TCM_NON_EXISTENT_LUN:
@@ -2068,11 +2068,13 @@ static void target_complete_ok_work(struct work_struct *work)
*/
if (cmd->transport_complete_callback) {
sense_reason_t rc;
+ bool caw = (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE);
+ bool zero_dl = !(cmd->data_length);
+ int post_ret = 0;
- rc = cmd->transport_complete_callback(cmd, true);
- if (!rc && !(cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE_POST)) {
- if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) &&
- !cmd->data_length)
+ rc = cmd->transport_complete_callback(cmd, true, &post_ret);
+ if (!rc && !post_ret) {
+ if (caw && zero_dl)
goto queue_rsp;
return;
@@ -2507,23 +2509,24 @@ out:
EXPORT_SYMBOL(target_get_sess_cmd);
static void target_release_cmd_kref(struct kref *kref)
- __releases(&se_cmd->se_sess->sess_cmd_lock)
{
struct se_cmd *se_cmd = container_of(kref, struct se_cmd, cmd_kref);
struct se_session *se_sess = se_cmd->se_sess;
+ unsigned long flags;
+ spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
if (list_empty(&se_cmd->se_cmd_list)) {
- spin_unlock(&se_sess->sess_cmd_lock);
+ spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
se_cmd->se_tfo->release_cmd(se_cmd);
return;
}
if (se_sess->sess_tearing_down && se_cmd->cmd_wait_set) {
- spin_unlock(&se_sess->sess_cmd_lock);
+ spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
complete(&se_cmd->cmd_wait_comp);
return;
}
list_del(&se_cmd->se_cmd_list);
- spin_unlock(&se_sess->sess_cmd_lock);
+ spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
se_cmd->se_tfo->release_cmd(se_cmd);
}
@@ -2539,8 +2542,7 @@ int target_put_sess_cmd(struct se_cmd *se_cmd)
se_cmd->se_tfo->release_cmd(se_cmd);
return 1;
}
- return kref_put_spinlock_irqsave(&se_cmd->cmd_kref, target_release_cmd_kref,
- &se_sess->sess_cmd_lock);
+ return kref_put(&se_cmd->cmd_kref, target_release_cmd_kref);
}
EXPORT_SYMBOL(target_put_sess_cmd);
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 937cebf76633..5e6d6cb348fc 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -638,7 +638,7 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data)
if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags))
return 0;
- if (!time_after(cmd->deadline, jiffies))
+ if (!time_after(jiffies, cmd->deadline))
return 0;
set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags);
@@ -1101,8 +1101,6 @@ tcmu_parse_cdb(struct se_cmd *cmd)
static const struct target_backend_ops tcmu_ops = {
.name = "user",
- .inquiry_prod = "USER",
- .inquiry_rev = TCMU_VERSION,
.owner = THIS_MODULE,
.transport_flags = TRANSPORT_FLAG_PASSTHROUGH,
.attach_hba = tcmu_attach_hba,
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index c463c89b90ef..8cc4ac64a91c 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -382,7 +382,7 @@ endmenu
config QCOM_SPMI_TEMP_ALARM
tristate "Qualcomm SPMI PMIC Temperature Alarm"
- depends on OF && (SPMI || COMPILE_TEST) && IIO
+ depends on OF && SPMI && IIO
select REGMAP_SPMI
help
This enables a thermal sysfs driver for Qualcomm plug-and-play (QPNP)
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index c8fe3cac2e0e..c5547bd711db 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -55,6 +55,7 @@
#define TEMPSENSE2_PANIC_VALUE_SHIFT 16
#define TEMPSENSE2_PANIC_VALUE_MASK 0xfff0000
+#define OCOTP_MEM0 0x0480
#define OCOTP_ANA1 0x04e0
/* The driver supports 1 passive trip point and 1 critical trip point */
@@ -64,12 +65,6 @@ enum imx_thermal_trip {
IMX_TRIP_NUM,
};
-/*
- * It defines the temperature in millicelsius for passive trip point
- * that will trigger cooling action when crossed.
- */
-#define IMX_TEMP_PASSIVE 85000
-
#define IMX_POLLING_DELAY 2000 /* millisecond */
#define IMX_PASSIVE_DELAY 1000
@@ -100,12 +95,14 @@ struct imx_thermal_data {
u32 c1, c2; /* See formula in imx_get_sensor_data() */
int temp_passive;
int temp_critical;
+ int temp_max;
int alarm_temp;
int last_temp;
bool irq_enabled;
int irq;
struct clk *thermal_clk;
const struct thermal_soc_data *socdata;
+ const char *temp_grade;
};
static void imx_set_panic_temp(struct imx_thermal_data *data,
@@ -285,10 +282,12 @@ static int imx_set_trip_temp(struct thermal_zone_device *tz, int trip,
{
struct imx_thermal_data *data = tz->devdata;
+ /* do not allow changing critical threshold */
if (trip == IMX_TRIP_CRITICAL)
return -EPERM;
- if (temp < 0 || temp > IMX_TEMP_PASSIVE)
+ /* do not allow passive to be set higher than critical */
+ if (temp < 0 || temp > data->temp_critical)
return -EINVAL;
data->temp_passive = temp;
@@ -404,17 +403,39 @@ static int imx_get_sensor_data(struct platform_device *pdev)
data->c1 = temp64;
data->c2 = n1 * data->c1 + 1000 * t1;
- /*
- * Set the default passive cooling trip point,
- * can be changed from userspace.
- */
- data->temp_passive = IMX_TEMP_PASSIVE;
+ /* use OTP for thermal grade */
+ ret = regmap_read(map, OCOTP_MEM0, &val);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to read temp grade: %d\n", ret);
+ return ret;
+ }
+
+ /* The maximum die temp is specified by the Temperature Grade */
+ switch ((val >> 6) & 0x3) {
+ case 0: /* Commercial (0 to 95C) */
+ data->temp_grade = "Commercial";
+ data->temp_max = 95000;
+ break;
+ case 1: /* Extended Commercial (-20 to 105C) */
+ data->temp_grade = "Extended Commercial";
+ data->temp_max = 105000;
+ break;
+ case 2: /* Industrial (-40 to 105C) */
+ data->temp_grade = "Industrial";
+ data->temp_max = 105000;
+ break;
+ case 3: /* Automotive (-40 to 125C) */
+ data->temp_grade = "Automotive";
+ data->temp_max = 125000;
+ break;
+ }
/*
- * The maximum die temperature set to 20 C higher than
- * IMX_TEMP_PASSIVE.
+ * Set the critical trip point at 5C under max
+ * Set the passive trip point at 10C under max (can change via sysfs)
*/
- data->temp_critical = 1000 * 20 + data->temp_passive;
+ data->temp_critical = data->temp_max - (1000 * 5);
+ data->temp_passive = data->temp_max - (1000 * 10);
return 0;
}
@@ -551,6 +572,11 @@ static int imx_thermal_probe(struct platform_device *pdev)
return ret;
}
+ dev_info(&pdev->dev, "%s CPU temperature grade - max:%dC"
+ " critical:%dC passive:%dC\n", data->temp_grade,
+ data->temp_max / 1000, data->temp_critical / 1000,
+ data->temp_passive / 1000);
+
/* Enable measurements at ~ 10 Hz */
regmap_write(map, TEMPSENSE1 + REG_CLR, TEMPSENSE1_MEASURE_FREQ);
measure_freq = DIV_ROUND_UP(32768, 10); /* 10 Hz */
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 42b7d4253b94..be4eedcb839a 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -964,7 +964,7 @@ void of_thermal_destroy_zones(void)
np = of_find_node_by_name(NULL, "thermal-zones");
if (!np) {
- pr_err("unable to find thermal zones\n");
+ pr_debug("unable to find thermal zones\n");
return;
}
diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c
index f0fbea386869..1246aa6fcab0 100644
--- a/drivers/thermal/power_allocator.c
+++ b/drivers/thermal/power_allocator.c
@@ -174,7 +174,6 @@ static void estimate_pid_constants(struct thermal_zone_device *tz,
/**
* pid_controller() - PID controller
* @tz: thermal zone we are operating in
- * @current_temp: the current temperature in millicelsius
* @control_temp: the target temperature in millicelsius
* @max_allocatable_power: maximum allocatable power for this thermal zone
*
@@ -191,7 +190,6 @@ static void estimate_pid_constants(struct thermal_zone_device *tz,
* Return: The power budget for the next period.
*/
static u32 pid_controller(struct thermal_zone_device *tz,
- int current_temp,
int control_temp,
u32 max_allocatable_power)
{
@@ -211,7 +209,7 @@ static u32 pid_controller(struct thermal_zone_device *tz,
true);
}
- err = control_temp - current_temp;
+ err = control_temp - tz->temperature;
err = int_to_frac(err);
/* Calculate the proportional term */
@@ -332,7 +330,6 @@ static void divvy_up_power(u32 *req_power, u32 *max_power, int num_actors,
}
static int allocate_power(struct thermal_zone_device *tz,
- int current_temp,
int control_temp)
{
struct thermal_instance *instance;
@@ -418,8 +415,7 @@ static int allocate_power(struct thermal_zone_device *tz,
i++;
}
- power_range = pid_controller(tz, current_temp, control_temp,
- max_allocatable_power);
+ power_range = pid_controller(tz, control_temp, max_allocatable_power);
divvy_up_power(weighted_req_power, max_power, num_actors,
total_weighted_req_power, power_range, granted_power,
@@ -444,8 +440,8 @@ static int allocate_power(struct thermal_zone_device *tz,
trace_thermal_power_allocator(tz, req_power, total_req_power,
granted_power, total_granted_power,
num_actors, power_range,
- max_allocatable_power, current_temp,
- control_temp - current_temp);
+ max_allocatable_power, tz->temperature,
+ control_temp - tz->temperature);
kfree(req_power);
unlock:
@@ -612,7 +608,7 @@ static void power_allocator_unbind(struct thermal_zone_device *tz)
static int power_allocator_throttle(struct thermal_zone_device *tz, int trip)
{
int ret;
- int switch_on_temp, control_temp, current_temp;
+ int switch_on_temp, control_temp;
struct power_allocator_params *params = tz->governor_data;
/*
@@ -622,15 +618,9 @@ static int power_allocator_throttle(struct thermal_zone_device *tz, int trip)
if (trip != params->trip_max_desired_temperature)
return 0;
- ret = thermal_zone_get_temp(tz, &current_temp);
- if (ret) {
- dev_warn(&tz->device, "Failed to get temperature: %d\n", ret);
- return ret;
- }
-
ret = tz->ops->get_trip_temp(tz, params->trip_switch_on,
&switch_on_temp);
- if (!ret && (current_temp < switch_on_temp)) {
+ if (!ret && (tz->temperature < switch_on_temp)) {
tz->passive = 0;
reset_pid_controller(params);
allow_maximum_power(tz);
@@ -648,7 +638,7 @@ static int power_allocator_throttle(struct thermal_zone_device *tz, int trip)
return ret;
}
- return allocate_power(tz, current_temp, control_temp);
+ return allocate_power(tz, control_temp);
}
static struct thermal_governor thermal_gov_power_allocator = {
diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index 5d4ae7d705e0..13d01edc7a04 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -361,6 +361,24 @@ static irqreturn_t rcar_thermal_irq(int irq, void *data)
/*
* platform functions
*/
+static int rcar_thermal_remove(struct platform_device *pdev)
+{
+ struct rcar_thermal_common *common = platform_get_drvdata(pdev);
+ struct device *dev = &pdev->dev;
+ struct rcar_thermal_priv *priv;
+
+ rcar_thermal_for_each_priv(priv, common) {
+ if (rcar_has_irq_support(priv))
+ rcar_thermal_irq_disable(priv);
+ thermal_zone_device_unregister(priv->zone);
+ }
+
+ pm_runtime_put(dev);
+ pm_runtime_disable(dev);
+
+ return 0;
+}
+
static int rcar_thermal_probe(struct platform_device *pdev)
{
struct rcar_thermal_common *common;
@@ -377,6 +395,8 @@ static int rcar_thermal_probe(struct platform_device *pdev)
if (!common)
return -ENOMEM;
+ platform_set_drvdata(pdev, common);
+
INIT_LIST_HEAD(&common->head);
spin_lock_init(&common->lock);
common->dev = dev;
@@ -454,43 +474,16 @@ static int rcar_thermal_probe(struct platform_device *pdev)
rcar_thermal_common_write(common, ENR, enr_bits);
}
- platform_set_drvdata(pdev, common);
-
dev_info(dev, "%d sensor probed\n", i);
return 0;
error_unregister:
- rcar_thermal_for_each_priv(priv, common) {
- if (rcar_has_irq_support(priv))
- rcar_thermal_irq_disable(priv);
- thermal_zone_device_unregister(priv->zone);
- }
-
- pm_runtime_put(dev);
- pm_runtime_disable(dev);
+ rcar_thermal_remove(pdev);
return ret;
}
-static int rcar_thermal_remove(struct platform_device *pdev)
-{
- struct rcar_thermal_common *common = platform_get_drvdata(pdev);
- struct device *dev = &pdev->dev;
- struct rcar_thermal_priv *priv;
-
- rcar_thermal_for_each_priv(priv, common) {
- if (rcar_has_irq_support(priv))
- rcar_thermal_irq_disable(priv);
- thermal_zone_device_unregister(priv->zone);
- }
-
- pm_runtime_put(dev);
- pm_runtime_disable(dev);
-
- return 0;
-}
-
static const struct of_device_id rcar_thermal_dt_ids[] = {
{ .compatible = "renesas,rcar-thermal", },
{},
diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c
index 9787e8aa509f..e845841ab036 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c
@@ -1,6 +1,9 @@
/*
* Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd
*
+ * Copyright (c) 2015, Fuzhou Rockchip Electronics Co., Ltd
+ * Caesar Wang <wxt@rock-chips.com>
+ *
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
@@ -45,17 +48,50 @@ enum tshut_polarity {
};
/**
- * The system has three Temperature Sensors. channel 0 is reserved,
- * channel 1 is for CPU, and channel 2 is for GPU.
+ * The system has two Temperature Sensors.
+ * sensor0 is for CPU, and sensor1 is for GPU.
*/
enum sensor_id {
- SENSOR_CPU = 1,
+ SENSOR_CPU = 0,
SENSOR_GPU,
};
+/**
+* The conversion table has the adc value and temperature.
+* ADC_DECREMENT is the adc value decremnet.(e.g. v2_code_table)
+* ADC_INCREMNET is the adc value incremnet.(e.g. v3_code_table)
+*/
+enum adc_sort_mode {
+ ADC_DECREMENT = 0,
+ ADC_INCREMENT,
+};
+
+/**
+ * The max sensors is two in rockchip SoCs.
+ * Two sensors: CPU and GPU sensor.
+ */
+#define SOC_MAX_SENSORS 2
+
+struct chip_tsadc_table {
+ const struct tsadc_table *id;
+
+ /* the array table size*/
+ unsigned int length;
+
+ /* that analogic mask data */
+ u32 data_mask;
+
+ /* the sort mode is adc value that increment or decrement in table */
+ enum adc_sort_mode mode;
+};
+
struct rockchip_tsadc_chip {
+ /* The sensor id of chip correspond to the ADC channel */
+ int chn_id[SOC_MAX_SENSORS];
+ int chn_num;
+
/* The hardware-controlled tshut property */
- long tshut_temp;
+ int tshut_temp;
enum tshut_mode tshut_mode;
enum tshut_polarity tshut_polarity;
@@ -65,37 +101,40 @@ struct rockchip_tsadc_chip {
void (*control)(void __iomem *reg, bool on);
/* Per-sensor methods */
- int (*get_temp)(int chn, void __iomem *reg, int *temp);
- void (*set_tshut_temp)(int chn, void __iomem *reg, long temp);
+ int (*get_temp)(struct chip_tsadc_table table,
+ int chn, void __iomem *reg, int *temp);
+ void (*set_tshut_temp)(struct chip_tsadc_table table,
+ int chn, void __iomem *reg, int temp);
void (*set_tshut_mode)(int chn, void __iomem *reg, enum tshut_mode m);
+
+ /* Per-table methods */
+ struct chip_tsadc_table table;
};
struct rockchip_thermal_sensor {
struct rockchip_thermal_data *thermal;
struct thermal_zone_device *tzd;
- enum sensor_id id;
+ int id;
};
-#define NUM_SENSORS 2 /* Ignore unused sensor 0 */
-
struct rockchip_thermal_data {
const struct rockchip_tsadc_chip *chip;
struct platform_device *pdev;
struct reset_control *reset;
- struct rockchip_thermal_sensor sensors[NUM_SENSORS];
+ struct rockchip_thermal_sensor sensors[SOC_MAX_SENSORS];
struct clk *clk;
struct clk *pclk;
void __iomem *regs;
- long tshut_temp;
+ int tshut_temp;
enum tshut_mode tshut_mode;
enum tshut_polarity tshut_polarity;
};
-/* TSADC V2 Sensor info define: */
+/* TSADC Sensor info define: */
#define TSADCV2_AUTO_CON 0x04
#define TSADCV2_INT_EN 0x08
#define TSADCV2_INT_PD 0x0c
@@ -117,6 +156,8 @@ struct rockchip_thermal_data {
#define TSADCV2_INT_PD_CLEAR_MASK ~BIT(8)
#define TSADCV2_DATA_MASK 0xfff
+#define TSADCV3_DATA_MASK 0x3ff
+
#define TSADCV2_HIGHT_INT_DEBOUNCE_COUNT 4
#define TSADCV2_HIGHT_TSHUT_DEBOUNCE_COUNT 4
#define TSADCV2_AUTO_PERIOD_TIME 250 /* msec */
@@ -124,7 +165,7 @@ struct rockchip_thermal_data {
struct tsadc_table {
u32 code;
- long temp;
+ int temp;
};
static const struct tsadc_table v2_code_table[] = {
@@ -165,21 +206,61 @@ static const struct tsadc_table v2_code_table[] = {
{3421, 125000},
};
-static u32 rk_tsadcv2_temp_to_code(long temp)
+static const struct tsadc_table v3_code_table[] = {
+ {0, -40000},
+ {106, -40000},
+ {108, -35000},
+ {110, -30000},
+ {112, -25000},
+ {114, -20000},
+ {116, -15000},
+ {118, -10000},
+ {120, -5000},
+ {122, 0},
+ {124, 5000},
+ {126, 10000},
+ {128, 15000},
+ {130, 20000},
+ {132, 25000},
+ {134, 30000},
+ {136, 35000},
+ {138, 40000},
+ {140, 45000},
+ {142, 50000},
+ {144, 55000},
+ {146, 60000},
+ {148, 65000},
+ {150, 70000},
+ {152, 75000},
+ {154, 80000},
+ {156, 85000},
+ {158, 90000},
+ {160, 95000},
+ {162, 100000},
+ {163, 105000},
+ {165, 110000},
+ {167, 115000},
+ {169, 120000},
+ {171, 125000},
+ {TSADCV3_DATA_MASK, 125000},
+};
+
+static u32 rk_tsadcv2_temp_to_code(struct chip_tsadc_table table,
+ int temp)
{
int high, low, mid;
low = 0;
- high = ARRAY_SIZE(v2_code_table) - 1;
+ high = table.length - 1;
mid = (high + low) / 2;
- if (temp < v2_code_table[low].temp || temp > v2_code_table[high].temp)
+ if (temp < table.id[low].temp || temp > table.id[high].temp)
return 0;
while (low <= high) {
- if (temp == v2_code_table[mid].temp)
- return v2_code_table[mid].code;
- else if (temp < v2_code_table[mid].temp)
+ if (temp == table.id[mid].temp)
+ return table.id[mid].code;
+ else if (temp < table.id[mid].temp)
high = mid - 1;
else
low = mid + 1;
@@ -189,29 +270,54 @@ static u32 rk_tsadcv2_temp_to_code(long temp)
return 0;
}
-static int rk_tsadcv2_code_to_temp(u32 code, int *temp)
+static int rk_tsadcv2_code_to_temp(struct chip_tsadc_table table, u32 code,
+ int *temp)
{
unsigned int low = 1;
- unsigned int high = ARRAY_SIZE(v2_code_table) - 1;
+ unsigned int high = table.length - 1;
unsigned int mid = (low + high) / 2;
unsigned int num;
unsigned long denom;
- BUILD_BUG_ON(ARRAY_SIZE(v2_code_table) < 2);
-
- code &= TSADCV2_DATA_MASK;
- if (code < v2_code_table[high].code)
- return -EAGAIN; /* Incorrect reading */
-
- while (low <= high) {
- if (code >= v2_code_table[mid].code &&
- code < v2_code_table[mid - 1].code)
- break;
- else if (code < v2_code_table[mid].code)
- low = mid + 1;
- else
- high = mid - 1;
- mid = (low + high) / 2;
+ WARN_ON(table.length < 2);
+
+ switch (table.mode) {
+ case ADC_DECREMENT:
+ code &= table.data_mask;
+ if (code < table.id[high].code)
+ return -EAGAIN; /* Incorrect reading */
+
+ while (low <= high) {
+ if (code >= table.id[mid].code &&
+ code < table.id[mid - 1].code)
+ break;
+ else if (code < table.id[mid].code)
+ low = mid + 1;
+ else
+ high = mid - 1;
+
+ mid = (low + high) / 2;
+ }
+ break;
+ case ADC_INCREMENT:
+ code &= table.data_mask;
+ if (code < table.id[low].code)
+ return -EAGAIN; /* Incorrect reading */
+
+ while (low <= high) {
+ if (code >= table.id[mid - 1].code &&
+ code < table.id[mid].code)
+ break;
+ else if (code > table.id[mid].code)
+ low = mid + 1;
+ else
+ high = mid - 1;
+
+ mid = (low + high) / 2;
+ }
+ break;
+ default:
+ pr_err("Invalid the conversion table\n");
}
/*
@@ -220,24 +326,28 @@ static int rk_tsadcv2_code_to_temp(u32 code, int *temp)
* temperature between 2 table entries is linear and interpolate
* to produce less granular result.
*/
- num = v2_code_table[mid].temp - v2_code_table[mid - 1].temp;
- num *= v2_code_table[mid - 1].code - code;
- denom = v2_code_table[mid - 1].code - v2_code_table[mid].code;
- *temp = v2_code_table[mid - 1].temp + (num / denom);
+ num = table.id[mid].temp - v2_code_table[mid - 1].temp;
+ num *= abs(table.id[mid - 1].code - code);
+ denom = abs(table.id[mid - 1].code - table.id[mid].code);
+ *temp = table.id[mid - 1].temp + (num / denom);
return 0;
}
/**
- * rk_tsadcv2_initialize - initialize TASDC Controller
- * (1) Set TSADCV2_AUTO_PERIOD, configure the interleave between
- * every two accessing of TSADC in normal operation.
- * (2) Set TSADCV2_AUTO_PERIOD_HT, configure the interleave between
- * every two accessing of TSADC after the temperature is higher
- * than COM_SHUT or COM_INT.
- * (3) Set TSADCV2_HIGH_INT_DEBOUNCE and TSADC_HIGHT_TSHUT_DEBOUNCE,
- * if the temperature is higher than COMP_INT or COMP_SHUT for
- * "debounce" times, TSADC controller will generate interrupt or TSHUT.
+ * rk_tsadcv2_initialize - initialize TASDC Controller.
+ *
+ * (1) Set TSADC_V2_AUTO_PERIOD:
+ * Configure the interleave between every two accessing of
+ * TSADC in normal operation.
+ *
+ * (2) Set TSADCV2_AUTO_PERIOD_HT:
+ * Configure the interleave between every two accessing of
+ * TSADC after the temperature is higher than COM_SHUT or COM_INT.
+ *
+ * (3) Set TSADCV2_HIGH_INT_DEBOUNCE and TSADC_HIGHT_TSHUT_DEBOUNCE:
+ * If the temperature is higher than COMP_INT or COMP_SHUT for
+ * "debounce" times, TSADC controller will generate interrupt or TSHUT.
*/
static void rk_tsadcv2_initialize(void __iomem *regs,
enum tshut_polarity tshut_polarity)
@@ -279,20 +389,22 @@ static void rk_tsadcv2_control(void __iomem *regs, bool enable)
writel_relaxed(val, regs + TSADCV2_AUTO_CON);
}
-static int rk_tsadcv2_get_temp(int chn, void __iomem *regs, int *temp)
+static int rk_tsadcv2_get_temp(struct chip_tsadc_table table,
+ int chn, void __iomem *regs, int *temp)
{
u32 val;
val = readl_relaxed(regs + TSADCV2_DATA(chn));
- return rk_tsadcv2_code_to_temp(val, temp);
+ return rk_tsadcv2_code_to_temp(table, val, temp);
}
-static void rk_tsadcv2_tshut_temp(int chn, void __iomem *regs, long temp)
+static void rk_tsadcv2_tshut_temp(struct chip_tsadc_table table,
+ int chn, void __iomem *regs, int temp)
{
u32 tshut_value, val;
- tshut_value = rk_tsadcv2_temp_to_code(temp);
+ tshut_value = rk_tsadcv2_temp_to_code(table, temp);
writel_relaxed(tshut_value, regs + TSADCV2_COMP_SHUT(chn));
/* TSHUT will be valid */
@@ -318,6 +430,10 @@ static void rk_tsadcv2_tshut_mode(int chn, void __iomem *regs,
}
static const struct rockchip_tsadc_chip rk3288_tsadc_data = {
+ .chn_id[SENSOR_CPU] = 1, /* cpu sensor is channel 1 */
+ .chn_id[SENSOR_GPU] = 2, /* gpu sensor is channel 2 */
+ .chn_num = 2, /* two channels for tsadc */
+
.tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
.tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
.tshut_temp = 95000,
@@ -328,6 +444,37 @@ static const struct rockchip_tsadc_chip rk3288_tsadc_data = {
.get_temp = rk_tsadcv2_get_temp,
.set_tshut_temp = rk_tsadcv2_tshut_temp,
.set_tshut_mode = rk_tsadcv2_tshut_mode,
+
+ .table = {
+ .id = v2_code_table,
+ .length = ARRAY_SIZE(v2_code_table),
+ .data_mask = TSADCV2_DATA_MASK,
+ .mode = ADC_DECREMENT,
+ },
+};
+
+static const struct rockchip_tsadc_chip rk3368_tsadc_data = {
+ .chn_id[SENSOR_CPU] = 0, /* cpu sensor is channel 0 */
+ .chn_id[SENSOR_GPU] = 1, /* gpu sensor is channel 1 */
+ .chn_num = 2, /* two channels for tsadc */
+
+ .tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
+ .tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
+ .tshut_temp = 95000,
+
+ .initialize = rk_tsadcv2_initialize,
+ .irq_ack = rk_tsadcv2_irq_ack,
+ .control = rk_tsadcv2_control,
+ .get_temp = rk_tsadcv2_get_temp,
+ .set_tshut_temp = rk_tsadcv2_tshut_temp,
+ .set_tshut_mode = rk_tsadcv2_tshut_mode,
+
+ .table = {
+ .id = v3_code_table,
+ .length = ARRAY_SIZE(v3_code_table),
+ .data_mask = TSADCV3_DATA_MASK,
+ .mode = ADC_INCREMENT,
+ },
};
static const struct of_device_id of_rockchip_thermal_match[] = {
@@ -335,6 +482,10 @@ static const struct of_device_id of_rockchip_thermal_match[] = {
.compatible = "rockchip,rk3288-tsadc",
.data = (void *)&rk3288_tsadc_data,
},
+ {
+ .compatible = "rockchip,rk3368-tsadc",
+ .data = (void *)&rk3368_tsadc_data,
+ },
{ /* end */ },
};
MODULE_DEVICE_TABLE(of, of_rockchip_thermal_match);
@@ -357,7 +508,7 @@ static irqreturn_t rockchip_thermal_alarm_irq_thread(int irq, void *dev)
thermal->chip->irq_ack(thermal->regs);
- for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++)
+ for (i = 0; i < thermal->chip->chn_num; i++)
thermal_zone_device_update(thermal->sensors[i].tzd);
return IRQ_HANDLED;
@@ -370,7 +521,8 @@ static int rockchip_thermal_get_temp(void *_sensor, int *out_temp)
const struct rockchip_tsadc_chip *tsadc = sensor->thermal->chip;
int retval;
- retval = tsadc->get_temp(sensor->id, thermal->regs, out_temp);
+ retval = tsadc->get_temp(tsadc->table,
+ sensor->id, thermal->regs, out_temp);
dev_dbg(&thermal->pdev->dev, "sensor %d - temp: %d, retval: %d\n",
sensor->id, *out_temp, retval);
@@ -389,7 +541,7 @@ static int rockchip_configure_from_dt(struct device *dev,
if (of_property_read_u32(np, "rockchip,hw-tshut-temp", &shut_temp)) {
dev_warn(dev,
- "Missing tshut temp property, using default %ld\n",
+ "Missing tshut temp property, using default %d\n",
thermal->chip->tshut_temp);
thermal->tshut_temp = thermal->chip->tshut_temp;
} else {
@@ -397,7 +549,7 @@ static int rockchip_configure_from_dt(struct device *dev,
}
if (thermal->tshut_temp > INT_MAX) {
- dev_err(dev, "Invalid tshut temperature specified: %ld\n",
+ dev_err(dev, "Invalid tshut temperature specified: %d\n",
thermal->tshut_temp);
return -ERANGE;
}
@@ -442,13 +594,14 @@ static int
rockchip_thermal_register_sensor(struct platform_device *pdev,
struct rockchip_thermal_data *thermal,
struct rockchip_thermal_sensor *sensor,
- enum sensor_id id)
+ int id)
{
const struct rockchip_tsadc_chip *tsadc = thermal->chip;
int error;
tsadc->set_tshut_mode(id, thermal->regs, thermal->tshut_mode);
- tsadc->set_tshut_temp(id, thermal->regs, thermal->tshut_temp);
+ tsadc->set_tshut_temp(tsadc->table, id, thermal->regs,
+ thermal->tshut_temp);
sensor->thermal = thermal;
sensor->id = id;
@@ -481,7 +634,7 @@ static int rockchip_thermal_probe(struct platform_device *pdev)
const struct of_device_id *match;
struct resource *res;
int irq;
- int i;
+ int i, j;
int error;
match = of_match_node(of_rockchip_thermal_match, np);
@@ -556,22 +709,19 @@ static int rockchip_thermal_probe(struct platform_device *pdev)
thermal->chip->initialize(thermal->regs, thermal->tshut_polarity);
- error = rockchip_thermal_register_sensor(pdev, thermal,
- &thermal->sensors[0],
- SENSOR_CPU);
- if (error) {
- dev_err(&pdev->dev,
- "failed to register CPU thermal sensor: %d\n", error);
- goto err_disable_pclk;
- }
-
- error = rockchip_thermal_register_sensor(pdev, thermal,
- &thermal->sensors[1],
- SENSOR_GPU);
- if (error) {
- dev_err(&pdev->dev,
- "failed to register GPU thermal sensor: %d\n", error);
- goto err_unregister_cpu_sensor;
+ for (i = 0; i < thermal->chip->chn_num; i++) {
+ error = rockchip_thermal_register_sensor(pdev, thermal,
+ &thermal->sensors[i],
+ thermal->chip->chn_id[i]);
+ if (error) {
+ dev_err(&pdev->dev,
+ "failed to register sensor[%d] : error = %d\n",
+ i, error);
+ for (j = 0; j < i; j++)
+ thermal_zone_of_sensor_unregister(&pdev->dev,
+ thermal->sensors[j].tzd);
+ goto err_disable_pclk;
+ }
}
error = devm_request_threaded_irq(&pdev->dev, irq, NULL,
@@ -581,22 +731,23 @@ static int rockchip_thermal_probe(struct platform_device *pdev)
if (error) {
dev_err(&pdev->dev,
"failed to request tsadc irq: %d\n", error);
- goto err_unregister_gpu_sensor;
+ goto err_unregister_sensor;
}
thermal->chip->control(thermal->regs, true);
- for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++)
+ for (i = 0; i < thermal->chip->chn_num; i++)
rockchip_thermal_toggle_sensor(&thermal->sensors[i], true);
platform_set_drvdata(pdev, thermal);
return 0;
-err_unregister_gpu_sensor:
- thermal_zone_of_sensor_unregister(&pdev->dev, thermal->sensors[1].tzd);
-err_unregister_cpu_sensor:
- thermal_zone_of_sensor_unregister(&pdev->dev, thermal->sensors[0].tzd);
+err_unregister_sensor:
+ while (i--)
+ thermal_zone_of_sensor_unregister(&pdev->dev,
+ thermal->sensors[i].tzd);
+
err_disable_pclk:
clk_disable_unprepare(thermal->pclk);
err_disable_clk:
@@ -610,7 +761,7 @@ static int rockchip_thermal_remove(struct platform_device *pdev)
struct rockchip_thermal_data *thermal = platform_get_drvdata(pdev);
int i;
- for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++) {
+ for (i = 0; i < thermal->chip->chn_num; i++) {
struct rockchip_thermal_sensor *sensor = &thermal->sensors[i];
rockchip_thermal_toggle_sensor(sensor, false);
@@ -631,7 +782,7 @@ static int __maybe_unused rockchip_thermal_suspend(struct device *dev)
struct rockchip_thermal_data *thermal = platform_get_drvdata(pdev);
int i;
- for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++)
+ for (i = 0; i < thermal->chip->chn_num; i++)
rockchip_thermal_toggle_sensor(&thermal->sensors[i], false);
thermal->chip->control(thermal->regs, false);
@@ -663,18 +814,19 @@ static int __maybe_unused rockchip_thermal_resume(struct device *dev)
thermal->chip->initialize(thermal->regs, thermal->tshut_polarity);
- for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++) {
- enum sensor_id id = thermal->sensors[i].id;
+ for (i = 0; i < thermal->chip->chn_num; i++) {
+ int id = thermal->sensors[i].id;
thermal->chip->set_tshut_mode(id, thermal->regs,
thermal->tshut_mode);
- thermal->chip->set_tshut_temp(id, thermal->regs,
+ thermal->chip->set_tshut_temp(thermal->chip->table,
+ id, thermal->regs,
thermal->tshut_temp);
}
thermal->chip->control(thermal->regs, true);
- for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++)
+ for (i = 0; i < thermal->chip->chn_num; i++)
rockchip_thermal_toggle_sensor(&thermal->sensors[i], true);
pinctrl_pm_select_default_state(dev);
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 13844261cd5f..e49c2bce551d 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -169,7 +169,7 @@ static inline int tty_copy_to_user(struct tty_struct *tty,
{
struct n_tty_data *ldata = tty->disc_data;
- tty_audit_add_data(tty, to, n, ldata->icanon);
+ tty_audit_add_data(tty, from, n, ldata->icanon);
return copy_to_user(to, from, n);
}
@@ -2054,13 +2054,13 @@ static int canon_copy_from_read_buf(struct tty_struct *tty,
size_t eol;
size_t tail;
int ret, found = 0;
- bool eof_push = 0;
/* N.B. avoid overrun if nr == 0 */
- n = min(*nr, smp_load_acquire(&ldata->canon_head) - ldata->read_tail);
- if (!n)
+ if (!*nr)
return 0;
+ n = min(*nr + 1, smp_load_acquire(&ldata->canon_head) - ldata->read_tail);
+
tail = ldata->read_tail & (N_TTY_BUF_SIZE - 1);
size = min_t(size_t, tail + n, N_TTY_BUF_SIZE);
@@ -2081,12 +2081,11 @@ static int canon_copy_from_read_buf(struct tty_struct *tty,
n = eol - tail;
if (n > N_TTY_BUF_SIZE)
n += N_TTY_BUF_SIZE;
- n += found;
- c = n;
+ c = n + found;
- if (found && !ldata->push && read_buf(ldata, eol) == __DISABLED_CHAR) {
- n--;
- eof_push = !n && ldata->read_tail != ldata->line_start;
+ if (!found || read_buf(ldata, eol) != __DISABLED_CHAR) {
+ c = min(*nr, c);
+ n = c;
}
n_tty_trace("%s: eol:%zu found:%d n:%zu c:%zu size:%zu more:%zu\n",
@@ -2116,7 +2115,7 @@ static int canon_copy_from_read_buf(struct tty_struct *tty,
ldata->push = 0;
tty_audit_push(tty);
}
- return eof_push ? -EAGAIN : 0;
+ return 0;
}
extern ssize_t redirected_tty_write(struct file *, const char __user *,
@@ -2273,10 +2272,7 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
if (ldata->icanon && !L_EXTPROC(tty)) {
retval = canon_copy_from_read_buf(tty, &b, &nr);
- if (retval == -EAGAIN) {
- retval = 0;
- continue;
- } else if (retval)
+ if (retval)
break;
} else {
int uncopied;
diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c
index c0533a57ec53..910bfee5a88b 100644
--- a/drivers/tty/serial/8250/8250_fsl.c
+++ b/drivers/tty/serial/8250/8250_fsl.c
@@ -60,3 +60,4 @@ int fsl8250_handle_irq(struct uart_port *port)
spin_unlock_irqrestore(&up->port.lock, flags);
return 1;
}
+EXPORT_SYMBOL_GPL(fsl8250_handle_irq);
diff --git a/drivers/tty/serial/8250/8250_uniphier.c b/drivers/tty/serial/8250/8250_uniphier.c
index d11621e2cf1d..245edbb68d4b 100644
--- a/drivers/tty/serial/8250/8250_uniphier.c
+++ b/drivers/tty/serial/8250/8250_uniphier.c
@@ -115,12 +115,16 @@ static void uniphier_serial_out(struct uart_port *p, int offset, int value)
*/
static int uniphier_serial_dl_read(struct uart_8250_port *up)
{
- return readl(up->port.membase + UNIPHIER_UART_DLR);
+ int offset = UNIPHIER_UART_DLR << up->port.regshift;
+
+ return readl(up->port.membase + offset);
}
static void uniphier_serial_dl_write(struct uart_8250_port *up, int value)
{
- writel(value, up->port.membase + UNIPHIER_UART_DLR);
+ int offset = UNIPHIER_UART_DLR << up->port.regshift;
+
+ writel(value, up->port.membase + offset);
}
static int uniphier_of_serial_setup(struct device *dev, struct uart_port *port,
diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
index e6f5e12a2d83..6412f1455beb 100644
--- a/drivers/tty/serial/8250/Kconfig
+++ b/drivers/tty/serial/8250/Kconfig
@@ -373,6 +373,7 @@ config SERIAL_8250_MID
depends on SERIAL_8250 && PCI
select HSU_DMA if SERIAL_8250_DMA
select HSU_DMA_PCI if X86_INTEL_MID
+ select RATIONAL
help
Selecting this option will enable handling of the extra features
present on the UART found on Intel Medfield SOC and various other
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 1aec4404062d..f38beb28e7ae 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1539,7 +1539,6 @@ config SERIAL_FSL_LPUART
tristate "Freescale lpuart serial port support"
depends on HAS_DMA
select SERIAL_CORE
- select SERIAL_EARLYCON
help
Support for the on-chip lpuart on some Freescale SOCs.
@@ -1547,6 +1546,7 @@ config SERIAL_FSL_LPUART_CONSOLE
bool "Console on Freescale lpuart serial port"
depends on SERIAL_FSL_LPUART=y
select SERIAL_CORE_CONSOLE
+ select SERIAL_EARLYCON
help
If you have enabled the lpuart serial port on the Freescale SoCs,
you can make it the console by answering Y to this option.
diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c
index 681e0f3d5e0e..a1c0a89d9c7f 100644
--- a/drivers/tty/serial/bcm63xx_uart.c
+++ b/drivers/tty/serial/bcm63xx_uart.c
@@ -474,7 +474,7 @@ static int bcm_uart_startup(struct uart_port *port)
/* register irq and enable rx interrupts */
ret = request_irq(port->irq, bcm_uart_interrupt, 0,
- bcm_uart_type(port), port);
+ dev_name(port->dev), port);
if (ret)
return ret;
bcm_uart_writel(port, UART_RX_INT_MASK, UART_IR_REG);
diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c
index f09636083426..b5b2f2be6be7 100644
--- a/drivers/tty/serial/earlycon.c
+++ b/drivers/tty/serial/earlycon.c
@@ -115,6 +115,7 @@ static int __init register_earlycon(char *buf, const struct earlycon_id *match)
if (buf && !parse_options(&early_console_dev, buf))
buf = NULL;
+ spin_lock_init(&port->lock);
port->uartclk = BASE_BAUD * 16;
if (port->mapbase)
port->membase = earlycon_map(port->mapbase, 64);
@@ -202,6 +203,7 @@ int __init of_setup_earlycon(unsigned long addr,
int err;
struct uart_port *port = &early_console_dev.port;
+ spin_lock_init(&port->lock);
port->iotype = UPIO_MEM;
port->mapbase = addr;
port->uartclk = BASE_BAUD * 16;
diff --git a/drivers/tty/serial/etraxfs-uart.c b/drivers/tty/serial/etraxfs-uart.c
index 6813e316e9ff..2f80bc7e44fb 100644
--- a/drivers/tty/serial/etraxfs-uart.c
+++ b/drivers/tty/serial/etraxfs-uart.c
@@ -894,7 +894,7 @@ static int etraxfs_uart_probe(struct platform_device *pdev)
up->regi_ser = of_iomap(np, 0);
up->port.dev = &pdev->dev;
- up->gpios = mctrl_gpio_init(&pdev->dev, 0);
+ up->gpios = mctrl_gpio_init_noauto(&pdev->dev, 0);
if (IS_ERR(up->gpios))
return PTR_ERR(up->gpios);
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 960e50a97558..51c7507b0444 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -1437,7 +1437,7 @@ static void sci_request_dma(struct uart_port *port)
sg_init_table(sg, 1);
s->rx_buf[i] = buf;
sg_dma_address(sg) = dma;
- sg->length = s->buf_len_rx;
+ sg_dma_len(sg) = s->buf_len_rx;
buf += s->buf_len_rx;
dma += s->buf_len_rx;
diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
index 064031870ba0..ca0d3802f2af 100644
--- a/drivers/tty/serial/sunhv.c
+++ b/drivers/tty/serial/sunhv.c
@@ -148,8 +148,10 @@ static int receive_chars_read(struct uart_port *port)
uart_handle_dcd_change(port, 1);
}
- for (i = 0; i < bytes_read; i++)
- uart_handle_sysrq_char(port, con_read_page[i]);
+ if (port->sysrq != 0 && *con_read_page) {
+ for (i = 0; i < bytes_read; i++)
+ uart_handle_sysrq_char(port, con_read_page[i]);
+ }
if (port->state == NULL)
continue;
@@ -168,17 +170,17 @@ struct sunhv_ops {
int (*receive_chars)(struct uart_port *port);
};
-static struct sunhv_ops bychar_ops = {
+static const struct sunhv_ops bychar_ops = {
.transmit_chars = transmit_chars_putchar,
.receive_chars = receive_chars_getchar,
};
-static struct sunhv_ops bywrite_ops = {
+static const struct sunhv_ops bywrite_ops = {
.transmit_chars = transmit_chars_write,
.receive_chars = receive_chars_read,
};
-static struct sunhv_ops *sunhv_ops = &bychar_ops;
+static const struct sunhv_ops *sunhv_ops = &bychar_ops;
static struct tty_port *receive_chars(struct uart_port *port)
{
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 5381a728d23e..e5139402e7f8 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -133,6 +133,12 @@ static void sysrq_handle_crash(int key)
{
char *killer = NULL;
+ /* we need to release the RCU read lock here,
+ * otherwise we get an annoying
+ * 'BUG: sleeping function called from invalid context'
+ * complaint from the kernel before the panic.
+ */
+ rcu_read_unlock();
panic_on_oops = 1; /* force panic */
wmb();
*killer = 1;
diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c
index 90ca082935f6..3d245cd3d8e6 100644
--- a/drivers/tty/tty_audit.c
+++ b/drivers/tty/tty_audit.c
@@ -265,7 +265,7 @@ static struct tty_audit_buf *tty_audit_buf_get(struct tty_struct *tty,
*
* Audit @data of @size from @tty, if necessary.
*/
-void tty_audit_add_data(struct tty_struct *tty, unsigned char *data,
+void tty_audit_add_data(struct tty_struct *tty, const void *data,
size_t size, unsigned icanon)
{
struct tty_audit_buf *buf;
diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index 9a479e61791a..3cd31e0d4bd9 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -450,7 +450,7 @@ receive_buf(struct tty_struct *tty, struct tty_buffer *head, int count)
count = disc->ops->receive_buf2(tty, p, f, count);
else {
count = min_t(int, count, tty->receive_room);
- if (count)
+ if (count && disc->ops->receive_buf)
disc->ops->receive_buf(tty, p, f, count);
}
return count;
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 0c41dbcb90b8..bcc8e1e8bb72 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1282,18 +1282,22 @@ int tty_send_xchar(struct tty_struct *tty, char ch)
int was_stopped = tty->stopped;
if (tty->ops->send_xchar) {
+ down_read(&tty->termios_rwsem);
tty->ops->send_xchar(tty, ch);
+ up_read(&tty->termios_rwsem);
return 0;
}
if (tty_write_lock(tty, 0) < 0)
return -ERESTARTSYS;
+ down_read(&tty->termios_rwsem);
if (was_stopped)
start_tty(tty);
tty->ops->write(tty, &ch, 1);
if (was_stopped)
stop_tty(tty);
+ up_read(&tty->termios_rwsem);
tty_write_unlock(tty);
return 0;
}
diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c
index 9c5aebfe7053..1445dd39aa62 100644
--- a/drivers/tty/tty_ioctl.c
+++ b/drivers/tty/tty_ioctl.c
@@ -1147,16 +1147,12 @@ int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file,
spin_unlock_irq(&tty->flow_lock);
break;
case TCIOFF:
- down_read(&tty->termios_rwsem);
if (STOP_CHAR(tty) != __DISABLED_CHAR)
retval = tty_send_xchar(tty, STOP_CHAR(tty));
- up_read(&tty->termios_rwsem);
break;
case TCION:
- down_read(&tty->termios_rwsem);
if (START_CHAR(tty) != __DISABLED_CHAR)
retval = tty_send_xchar(tty, START_CHAR(tty));
- up_read(&tty->termios_rwsem);
break;
default:
return -EINVAL;
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 5af8f1874c1a..629e3c865072 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -592,7 +592,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
/* Restart the work queue in case no characters kick it off. Safe if
already running */
- schedule_work(&tty->port->buf.work);
+ tty_buffer_restart_work(tty->port);
tty_unlock(tty);
return retval;
diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c
index 6ccbf60cdd5c..5a048b7b92e8 100644
--- a/drivers/usb/chipidea/ci_hdrc_imx.c
+++ b/drivers/usb/chipidea/ci_hdrc_imx.c
@@ -84,6 +84,12 @@ struct ci_hdrc_imx_data {
struct imx_usbmisc_data *usbmisc_data;
bool supports_runtime_pm;
bool in_lpm;
+ /* SoC before i.mx6 (except imx23/imx28) needs three clks */
+ bool need_three_clks;
+ struct clk *clk_ipg;
+ struct clk *clk_ahb;
+ struct clk *clk_per;
+ /* --------------------------------- */
};
/* Common functions shared by usbmisc drivers */
@@ -135,6 +141,102 @@ static struct imx_usbmisc_data *usbmisc_get_init_data(struct device *dev)
}
/* End of common functions shared by usbmisc drivers*/
+static int imx_get_clks(struct device *dev)
+{
+ struct ci_hdrc_imx_data *data = dev_get_drvdata(dev);
+ int ret = 0;
+
+ data->clk_ipg = devm_clk_get(dev, "ipg");
+ if (IS_ERR(data->clk_ipg)) {
+ /* If the platform only needs one clocks */
+ data->clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(data->clk)) {
+ ret = PTR_ERR(data->clk);
+ dev_err(dev,
+ "Failed to get clks, err=%ld,%ld\n",
+ PTR_ERR(data->clk), PTR_ERR(data->clk_ipg));
+ return ret;
+ }
+ return ret;
+ }
+
+ data->clk_ahb = devm_clk_get(dev, "ahb");
+ if (IS_ERR(data->clk_ahb)) {
+ ret = PTR_ERR(data->clk_ahb);
+ dev_err(dev,
+ "Failed to get ahb clock, err=%d\n", ret);
+ return ret;
+ }
+
+ data->clk_per = devm_clk_get(dev, "per");
+ if (IS_ERR(data->clk_per)) {
+ ret = PTR_ERR(data->clk_per);
+ dev_err(dev,
+ "Failed to get per clock, err=%d\n", ret);
+ return ret;
+ }
+
+ data->need_three_clks = true;
+ return ret;
+}
+
+static int imx_prepare_enable_clks(struct device *dev)
+{
+ struct ci_hdrc_imx_data *data = dev_get_drvdata(dev);
+ int ret = 0;
+
+ if (data->need_three_clks) {
+ ret = clk_prepare_enable(data->clk_ipg);
+ if (ret) {
+ dev_err(dev,
+ "Failed to prepare/enable ipg clk, err=%d\n",
+ ret);
+ return ret;
+ }
+
+ ret = clk_prepare_enable(data->clk_ahb);
+ if (ret) {
+ dev_err(dev,
+ "Failed to prepare/enable ahb clk, err=%d\n",
+ ret);
+ clk_disable_unprepare(data->clk_ipg);
+ return ret;
+ }
+
+ ret = clk_prepare_enable(data->clk_per);
+ if (ret) {
+ dev_err(dev,
+ "Failed to prepare/enable per clk, err=%d\n",
+ ret);
+ clk_disable_unprepare(data->clk_ahb);
+ clk_disable_unprepare(data->clk_ipg);
+ return ret;
+ }
+ } else {
+ ret = clk_prepare_enable(data->clk);
+ if (ret) {
+ dev_err(dev,
+ "Failed to prepare/enable clk, err=%d\n",
+ ret);
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+static void imx_disable_unprepare_clks(struct device *dev)
+{
+ struct ci_hdrc_imx_data *data = dev_get_drvdata(dev);
+
+ if (data->need_three_clks) {
+ clk_disable_unprepare(data->clk_per);
+ clk_disable_unprepare(data->clk_ahb);
+ clk_disable_unprepare(data->clk_ipg);
+ } else {
+ clk_disable_unprepare(data->clk);
+ }
+}
static int ci_hdrc_imx_probe(struct platform_device *pdev)
{
@@ -145,31 +247,31 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev)
.flags = CI_HDRC_SET_NON_ZERO_TTHA,
};
int ret;
- const struct of_device_id *of_id =
- of_match_device(ci_hdrc_imx_dt_ids, &pdev->dev);
- const struct ci_hdrc_imx_platform_flag *imx_platform_flag = of_id->data;
+ const struct of_device_id *of_id;
+ const struct ci_hdrc_imx_platform_flag *imx_platform_flag;
+
+ of_id = of_match_device(ci_hdrc_imx_dt_ids, &pdev->dev);
+ if (!of_id)
+ return -ENODEV;
+
+ imx_platform_flag = of_id->data;
data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
+ platform_set_drvdata(pdev, data);
data->usbmisc_data = usbmisc_get_init_data(&pdev->dev);
if (IS_ERR(data->usbmisc_data))
return PTR_ERR(data->usbmisc_data);
- data->clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(data->clk)) {
- dev_err(&pdev->dev,
- "Failed to get clock, err=%ld\n", PTR_ERR(data->clk));
- return PTR_ERR(data->clk);
- }
+ ret = imx_get_clks(&pdev->dev);
+ if (ret)
+ return ret;
- ret = clk_prepare_enable(data->clk);
- if (ret) {
- dev_err(&pdev->dev,
- "Failed to prepare or enable clock, err=%d\n", ret);
+ ret = imx_prepare_enable_clks(&pdev->dev);
+ if (ret)
return ret;
- }
data->phy = devm_usb_get_phy_by_phandle(&pdev->dev, "fsl,usbphy", 0);
if (IS_ERR(data->phy)) {
@@ -212,8 +314,6 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev)
goto disable_device;
}
- platform_set_drvdata(pdev, data);
-
if (data->supports_runtime_pm) {
pm_runtime_set_active(&pdev->dev);
pm_runtime_enable(&pdev->dev);
@@ -226,7 +326,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev)
disable_device:
ci_hdrc_remove_device(data->ci_pdev);
err_clk:
- clk_disable_unprepare(data->clk);
+ imx_disable_unprepare_clks(&pdev->dev);
return ret;
}
@@ -240,7 +340,7 @@ static int ci_hdrc_imx_remove(struct platform_device *pdev)
pm_runtime_put_noidle(&pdev->dev);
}
ci_hdrc_remove_device(data->ci_pdev);
- clk_disable_unprepare(data->clk);
+ imx_disable_unprepare_clks(&pdev->dev);
return 0;
}
@@ -252,7 +352,7 @@ static int imx_controller_suspend(struct device *dev)
dev_dbg(dev, "at %s\n", __func__);
- clk_disable_unprepare(data->clk);
+ imx_disable_unprepare_clks(dev);
data->in_lpm = true;
return 0;
@@ -270,7 +370,7 @@ static int imx_controller_resume(struct device *dev)
return 0;
}
- ret = clk_prepare_enable(data->clk);
+ ret = imx_prepare_enable_clks(dev);
if (ret)
return ret;
@@ -285,7 +385,7 @@ static int imx_controller_resume(struct device *dev)
return 0;
clk_disable:
- clk_disable_unprepare(data->clk);
+ imx_disable_unprepare_clks(dev);
return ret;
}
diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c
index 080b7be3daf0..58c8485a0715 100644
--- a/drivers/usb/chipidea/debug.c
+++ b/drivers/usb/chipidea/debug.c
@@ -322,8 +322,10 @@ static ssize_t ci_role_write(struct file *file, const char __user *ubuf,
return -EINVAL;
pm_runtime_get_sync(ci->dev);
+ disable_irq(ci->irq);
ci_role_stop(ci);
ret = ci_role_start(ci, role);
+ enable_irq(ci->irq);
pm_runtime_put_sync(ci->dev);
return ret ? ret : count;
diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index 8223fe73ea85..391a1225b0ba 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -1751,6 +1751,22 @@ static int ci_udc_start(struct usb_gadget *gadget,
return retval;
}
+static void ci_udc_stop_for_otg_fsm(struct ci_hdrc *ci)
+{
+ if (!ci_otg_is_fsm_mode(ci))
+ return;
+
+ mutex_lock(&ci->fsm.lock);
+ if (ci->fsm.otg->state == OTG_STATE_A_PERIPHERAL) {
+ ci->fsm.a_bidl_adis_tmout = 1;
+ ci_hdrc_otg_fsm_start(ci);
+ } else if (ci->fsm.otg->state == OTG_STATE_B_PERIPHERAL) {
+ ci->fsm.protocol = PROTO_UNDEF;
+ ci->fsm.otg->state = OTG_STATE_UNDEFINED;
+ }
+ mutex_unlock(&ci->fsm.lock);
+}
+
/**
* ci_udc_stop: unregister a gadget driver
*/
@@ -1775,6 +1791,7 @@ static int ci_udc_stop(struct usb_gadget *gadget)
ci->driver = NULL;
spin_unlock_irqrestore(&ci->lock, flags);
+ ci_udc_stop_for_otg_fsm(ci);
return 0;
}
diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c
index fcea4eb36eee..ab8b027e8cc8 100644
--- a/drivers/usb/chipidea/usbmisc_imx.c
+++ b/drivers/usb/chipidea/usbmisc_imx.c
@@ -500,7 +500,11 @@ static int usbmisc_imx_probe(struct platform_device *pdev)
{
struct resource *res;
struct imx_usbmisc *data;
- struct of_device_id *tmp_dev;
+ const struct of_device_id *of_id;
+
+ of_id = of_match_device(usbmisc_imx_dt_ids, &pdev->dev);
+ if (!of_id)
+ return -ENODEV;
data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
if (!data)
@@ -513,9 +517,7 @@ static int usbmisc_imx_probe(struct platform_device *pdev)
if (IS_ERR(data->base))
return PTR_ERR(data->base);
- tmp_dev = (struct of_device_id *)
- of_match_device(usbmisc_imx_dt_ids, &pdev->dev);
- data->ops = (const struct usbmisc_ops *)tmp_dev->data;
+ data->ops = (const struct usbmisc_ops *)of_id->data;
platform_set_drvdata(pdev, data);
return 0;
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index b30e7423549b..26ca4f910cb0 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1838,6 +1838,11 @@ static const struct usb_device_id acm_ids[] = {
},
#endif
+ /* Exclude Infineon Flash Loader utility */
+ { USB_DEVICE(0x058b, 0x0041),
+ .driver_info = IGNORE_DEVICE,
+ },
+
/* control interfaces without any protocol set */
{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
USB_CDC_PROTO_NONE) },
diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c
index 433bbc34a8a4..071964c7847f 100644
--- a/drivers/usb/class/usblp.c
+++ b/drivers/usb/class/usblp.c
@@ -884,11 +884,11 @@ static int usblp_wwait(struct usblp *usblp, int nonblock)
add_wait_queue(&usblp->wwait, &waita);
for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
if (mutex_lock_interruptible(&usblp->mut)) {
rc = -EINTR;
break;
}
+ set_current_state(TASK_INTERRUPTIBLE);
rc = usblp_wtest(usblp, nonblock);
mutex_unlock(&usblp->mut);
if (rc <= 0)
diff --git a/drivers/usb/core/Kconfig b/drivers/usb/core/Kconfig
index a99c89e78126..dd280108758f 100644
--- a/drivers/usb/core/Kconfig
+++ b/drivers/usb/core/Kconfig
@@ -77,8 +77,7 @@ config USB_OTG_BLACKLIST_HUB
config USB_OTG_FSM
tristate "USB 2.0 OTG FSM implementation"
- depends on USB
- select USB_OTG
+ depends on USB && USB_OTG
select USB_PHY
help
Implements OTG Finite State Machine as specified in On-The-Go
diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 7caff020106e..5050760f5e17 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -115,7 +115,8 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
USB_SS_MULT(desc->bmAttributes) > 3) {
dev_warn(ddev, "Isoc endpoint has Mult of %d in "
"config %d interface %d altsetting %d ep %d: "
- "setting to 3\n", desc->bmAttributes + 1,
+ "setting to 3\n",
+ USB_SS_MULT(desc->bmAttributes),
cfgno, inum, asnum, ep->desc.bEndpointAddress);
ep->ss_ep_comp.bmAttributes = 2;
}
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index bdeadc112d29..ddbf32d599cb 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -124,6 +124,10 @@ struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev)
int usb_device_supports_lpm(struct usb_device *udev)
{
+ /* Some devices have trouble with LPM */
+ if (udev->quirks & USB_QUIRK_NO_LPM)
+ return 0;
+
/* USB 2.1 (and greater) devices indicate LPM support through
* their USB 2.0 Extended Capabilities BOS descriptor.
*/
@@ -1031,10 +1035,20 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
unsigned delay;
/* Continue a partial initialization */
- if (type == HUB_INIT2)
- goto init2;
- if (type == HUB_INIT3)
+ if (type == HUB_INIT2 || type == HUB_INIT3) {
+ device_lock(hub->intfdev);
+
+ /* Was the hub disconnected while we were waiting? */
+ if (hub->disconnected) {
+ device_unlock(hub->intfdev);
+ kref_put(&hub->kref, hub_release);
+ return;
+ }
+ if (type == HUB_INIT2)
+ goto init2;
goto init3;
+ }
+ kref_get(&hub->kref);
/* The superspeed hub except for root hub has to use Hub Depth
* value as an offset into the route string to locate the bits
@@ -1232,6 +1246,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
queue_delayed_work(system_power_efficient_wq,
&hub->init_work,
msecs_to_jiffies(delay));
+ device_unlock(hub->intfdev);
return; /* Continues at init3: below */
} else {
msleep(delay);
@@ -1253,6 +1268,11 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
/* Allow autosuspend if it was suppressed */
if (type <= HUB_INIT3)
usb_autopm_put_interface_async(to_usb_interface(hub->intfdev));
+
+ if (type == HUB_INIT2 || type == HUB_INIT3)
+ device_unlock(hub->intfdev);
+
+ kref_put(&hub->kref, hub_release);
}
/* Implement the continuations for the delays above */
@@ -4512,6 +4532,8 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
goto fail;
}
+ usb_detect_quirks(udev);
+
if (udev->wusb == 0 && le16_to_cpu(udev->descriptor.bcdUSB) >= 0x0201) {
retval = usb_get_bos_descriptor(udev);
if (!retval) {
@@ -4710,7 +4732,6 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
if (status < 0)
goto loop;
- usb_detect_quirks(udev);
if (udev->quirks & USB_QUIRK_DELAY_INIT)
msleep(1000);
@@ -5326,9 +5347,6 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
if (udev->usb2_hw_lpm_enabled == 1)
usb_set_usb2_hardware_lpm(udev, 0);
- bos = udev->bos;
- udev->bos = NULL;
-
/* Disable LPM and LTM while we reset the device and reinstall the alt
* settings. Device-initiated LPM settings, and system exit latency
* settings are cleared when the device is reset, so we have to set
@@ -5337,15 +5355,18 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
ret = usb_unlocked_disable_lpm(udev);
if (ret) {
dev_err(&udev->dev, "%s Failed to disable LPM\n.", __func__);
- goto re_enumerate;
+ goto re_enumerate_no_bos;
}
ret = usb_disable_ltm(udev);
if (ret) {
dev_err(&udev->dev, "%s Failed to disable LTM\n.",
__func__);
- goto re_enumerate;
+ goto re_enumerate_no_bos;
}
+ bos = udev->bos;
+ udev->bos = NULL;
+
for (i = 0; i < SET_CONFIG_TRIES; ++i) {
/* ep0 maxpacket size may change; let the HCD know about it.
@@ -5442,10 +5463,11 @@ done:
return 0;
re_enumerate:
- /* LPM state doesn't matter when we're about to destroy the device. */
- hub_port_logical_disconnect(parent_hub, port1);
usb_release_bos_descriptor(udev);
udev->bos = bos;
+re_enumerate_no_bos:
+ /* LPM state doesn't matter when we're about to destroy the device. */
+ hub_port_logical_disconnect(parent_hub, port1);
return -ENODEV;
}
diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index 210618319f10..5487fe308f01 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -206,7 +206,7 @@ static int link_peers(struct usb_port *left, struct usb_port *right)
else
method = "default";
- pr_warn("usb: failed to peer %s and %s by %s (%s:%s) (%s:%s)\n",
+ pr_debug("usb: failed to peer %s and %s by %s (%s:%s) (%s:%s)\n",
dev_name(&left->dev), dev_name(&right->dev), method,
dev_name(&left->dev),
lpeer ? dev_name(&lpeer->dev) : "none",
@@ -265,7 +265,7 @@ static void link_peers_report(struct usb_port *left, struct usb_port *right)
if (rc == 0) {
dev_dbg(&left->dev, "peered to %s\n", dev_name(&right->dev));
} else {
- dev_warn(&left->dev, "failed to peer to %s (%d)\n",
+ dev_dbg(&left->dev, "failed to peer to %s (%d)\n",
dev_name(&right->dev), rc);
pr_warn_once("usb: port power management may be unreliable\n");
usb_port_block_power_off = 1;
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index f5a381945db2..6dc810bce295 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -125,6 +125,9 @@ static const struct usb_device_id usb_quirk_list[] = {
{ USB_DEVICE(0x04f3, 0x016f), .driver_info =
USB_QUIRK_DEVICE_QUALIFIER },
+ { USB_DEVICE(0x04f3, 0x21b8), .driver_info =
+ USB_QUIRK_DEVICE_QUALIFIER },
+
/* Roland SC-8820 */
{ USB_DEVICE(0x0582, 0x0007), .driver_info = USB_QUIRK_RESET_RESUME },
@@ -199,6 +202,12 @@ static const struct usb_device_id usb_quirk_list[] = {
{ USB_DEVICE(0x1a0a, 0x0200), .driver_info =
USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
+ /* Blackmagic Design Intensity Shuttle */
+ { USB_DEVICE(0x1edb, 0xbd3b), .driver_info = USB_QUIRK_NO_LPM },
+
+ /* Blackmagic Design UltraStudio SDI */
+ { USB_DEVICE(0x1edb, 0xbd4f), .driver_info = USB_QUIRK_NO_LPM },
+
{ } /* terminating entry must be last */
};
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index e79baf73c234..571c21727ff9 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -324,12 +324,13 @@ void dwc2_hcd_disconnect(struct dwc2_hsotg *hsotg)
*/
static void dwc2_hcd_rem_wakeup(struct dwc2_hsotg *hsotg)
{
- if (hsotg->lx_state == DWC2_L2) {
+ if (hsotg->bus_suspended) {
hsotg->flags.b.port_suspend_change = 1;
usb_hcd_resume_root_hub(hsotg->priv);
- } else {
- hsotg->flags.b.port_l1_change = 1;
}
+
+ if (hsotg->lx_state == DWC2_L1)
+ hsotg->flags.b.port_l1_change = 1;
}
/**
@@ -1428,8 +1429,8 @@ static void dwc2_wakeup_detected(unsigned long data)
dev_dbg(hsotg->dev, "Clear Resume: HPRT0=%0x\n",
dwc2_readl(hsotg->regs + HPRT0));
- hsotg->bus_suspended = 0;
dwc2_hcd_rem_wakeup(hsotg);
+ hsotg->bus_suspended = 0;
/* Change to L0 state */
hsotg->lx_state = DWC2_L0;
diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index 5859b0fa19ee..39c1cbf0e75d 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -108,7 +108,8 @@ static const struct dwc2_core_params params_rk3066 = {
.host_ls_low_power_phy_clk = -1,
.ts_dline = -1,
.reload_ctl = -1,
- .ahbcfg = 0x7, /* INCR16 */
+ .ahbcfg = GAHBCFG_HBSTLEN_INCR16 <<
+ GAHBCFG_HBSTLEN_SHIFT,
.uframe_sched = -1,
.external_id_pin_ctl = -1,
.hibernation = -1,
@@ -124,9 +125,11 @@ static int __dwc2_lowlevel_hw_enable(struct dwc2_hsotg *hsotg)
if (ret)
return ret;
- ret = clk_prepare_enable(hsotg->clk);
- if (ret)
- return ret;
+ if (hsotg->clk) {
+ ret = clk_prepare_enable(hsotg->clk);
+ if (ret)
+ return ret;
+ }
if (hsotg->uphy)
ret = usb_phy_init(hsotg->uphy);
@@ -174,7 +177,8 @@ static int __dwc2_lowlevel_hw_disable(struct dwc2_hsotg *hsotg)
if (ret)
return ret;
- clk_disable_unprepare(hsotg->clk);
+ if (hsotg->clk)
+ clk_disable_unprepare(hsotg->clk);
ret = regulator_bulk_disable(ARRAY_SIZE(hsotg->supplies),
hsotg->supplies);
@@ -211,14 +215,41 @@ static int dwc2_lowlevel_hw_init(struct dwc2_hsotg *hsotg)
*/
hsotg->phy = devm_phy_get(hsotg->dev, "usb2-phy");
if (IS_ERR(hsotg->phy)) {
- hsotg->phy = NULL;
+ ret = PTR_ERR(hsotg->phy);
+ switch (ret) {
+ case -ENODEV:
+ case -ENOSYS:
+ hsotg->phy = NULL;
+ break;
+ case -EPROBE_DEFER:
+ return ret;
+ default:
+ dev_err(hsotg->dev, "error getting phy %d\n", ret);
+ return ret;
+ }
+ }
+
+ if (!hsotg->phy) {
hsotg->uphy = devm_usb_get_phy(hsotg->dev, USB_PHY_TYPE_USB2);
- if (IS_ERR(hsotg->uphy))
- hsotg->uphy = NULL;
- else
- hsotg->plat = dev_get_platdata(hsotg->dev);
+ if (IS_ERR(hsotg->uphy)) {
+ ret = PTR_ERR(hsotg->uphy);
+ switch (ret) {
+ case -ENODEV:
+ case -ENXIO:
+ hsotg->uphy = NULL;
+ break;
+ case -EPROBE_DEFER:
+ return ret;
+ default:
+ dev_err(hsotg->dev, "error getting usb phy %d\n",
+ ret);
+ return ret;
+ }
+ }
}
+ hsotg->plat = dev_get_platdata(hsotg->dev);
+
if (hsotg->phy) {
/*
* If using the generic PHY framework, check if the PHY bus
@@ -228,11 +259,6 @@ static int dwc2_lowlevel_hw_init(struct dwc2_hsotg *hsotg)
hsotg->phyif = GUSBCFG_PHYIF8;
}
- if (!hsotg->phy && !hsotg->uphy && !hsotg->plat) {
- dev_err(hsotg->dev, "no platform data or transceiver defined\n");
- return -EPROBE_DEFER;
- }
-
/* Clock */
hsotg->clk = devm_clk_get(hsotg->dev, "otg");
if (IS_ERR(hsotg->clk)) {
@@ -341,20 +367,6 @@ static int dwc2_driver_probe(struct platform_device *dev)
if (retval)
return retval;
- irq = platform_get_irq(dev, 0);
- if (irq < 0) {
- dev_err(&dev->dev, "missing IRQ resource\n");
- return irq;
- }
-
- dev_dbg(hsotg->dev, "registering common handler for irq%d\n",
- irq);
- retval = devm_request_irq(hsotg->dev, irq,
- dwc2_handle_common_intr, IRQF_SHARED,
- dev_name(hsotg->dev), hsotg);
- if (retval)
- return retval;
-
res = platform_get_resource(dev, IORESOURCE_MEM, 0);
hsotg->regs = devm_ioremap_resource(&dev->dev, res);
if (IS_ERR(hsotg->regs))
@@ -389,6 +401,20 @@ static int dwc2_driver_probe(struct platform_device *dev)
dwc2_set_all_params(hsotg->core_params, -1);
+ irq = platform_get_irq(dev, 0);
+ if (irq < 0) {
+ dev_err(&dev->dev, "missing IRQ resource\n");
+ return irq;
+ }
+
+ dev_dbg(hsotg->dev, "registering common handler for irq%d\n",
+ irq);
+ retval = devm_request_irq(hsotg->dev, irq,
+ dwc2_handle_common_intr, IRQF_SHARED,
+ dev_name(hsotg->dev), hsotg);
+ if (retval)
+ return retval;
+
retval = dwc2_lowlevel_hw_enable(hsotg);
if (retval)
return retval;
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 77a622cb48ab..009d83048c8c 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -34,6 +34,8 @@
#define PCI_DEVICE_ID_INTEL_BSW 0x22b7
#define PCI_DEVICE_ID_INTEL_SPTLP 0x9d30
#define PCI_DEVICE_ID_INTEL_SPTH 0xa130
+#define PCI_DEVICE_ID_INTEL_BXT 0x0aaa
+#define PCI_DEVICE_ID_INTEL_APL 0x5aaa
static const struct acpi_gpio_params reset_gpios = { 0, 0, false };
static const struct acpi_gpio_params cs_gpios = { 1, 0, false };
@@ -210,6 +212,8 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MRFLD), },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SPTLP), },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SPTH), },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT), },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_APL), },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), },
{ } /* Terminating Entry */
};
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 55ba447fdf8b..a58376fd65fe 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1078,6 +1078,7 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
* little bit faster.
*/
if (!usb_endpoint_xfer_isoc(dep->endpoint.desc) &&
+ !usb_endpoint_xfer_int(dep->endpoint.desc) &&
!(dep->flags & DWC3_EP_BUSY)) {
ret = __dwc3_gadget_kick_transfer(dep, 0, true);
goto out;
@@ -2744,12 +2745,34 @@ int dwc3_gadget_init(struct dwc3 *dwc)
}
dwc->gadget.ops = &dwc3_gadget_ops;
- dwc->gadget.max_speed = USB_SPEED_SUPER;
dwc->gadget.speed = USB_SPEED_UNKNOWN;
dwc->gadget.sg_supported = true;
dwc->gadget.name = "dwc3-gadget";
/*
+ * FIXME We might be setting max_speed to <SUPER, however versions
+ * <2.20a of dwc3 have an issue with metastability (documented
+ * elsewhere in this driver) which tells us we can't set max speed to
+ * anything lower than SUPER.
+ *
+ * Because gadget.max_speed is only used by composite.c and function
+ * drivers (i.e. it won't go into dwc3's registers) we are allowing this
+ * to happen so we avoid sending SuperSpeed Capability descriptor
+ * together with our BOS descriptor as that could confuse host into
+ * thinking we can handle super speed.
+ *
+ * Note that, in fact, we won't even support GetBOS requests when speed
+ * is less than super speed because we don't have means, yet, to tell
+ * composite.c that we are USB 2.0 + LPM ECN.
+ */
+ if (dwc->revision < DWC3_REVISION_220A)
+ dwc3_trace(trace_dwc3_gadget,
+ "Changing max_speed on rev %08x\n",
+ dwc->revision);
+
+ dwc->gadget.max_speed = dwc->maximum_speed;
+
+ /*
* Per databook, DWC3 needs buffer size to be aligned to MaxPacketSize
* on ep out.
*/
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index adc6d52efa46..cf43e9e18368 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -423,7 +423,7 @@ static ssize_t __ffs_ep0_read_events(struct ffs_data *ffs, char __user *buf,
spin_unlock_irq(&ffs->ev.waitq.lock);
mutex_unlock(&ffs->mutex);
- return unlikely(__copy_to_user(buf, events, size)) ? -EFAULT : size;
+ return unlikely(copy_to_user(buf, events, size)) ? -EFAULT : size;
}
static ssize_t ffs_ep0_read(struct file *file, char __user *buf,
@@ -513,7 +513,7 @@ static ssize_t ffs_ep0_read(struct file *file, char __user *buf,
/* unlocks spinlock */
ret = __ffs_ep0_queue_wait(ffs, data, len);
- if (likely(ret > 0) && unlikely(__copy_to_user(buf, data, len)))
+ if (likely(ret > 0) && unlikely(copy_to_user(buf, data, len)))
ret = -EFAULT;
goto done_mutex;
@@ -3493,7 +3493,7 @@ static char *ffs_prepare_buffer(const char __user *buf, size_t len)
if (unlikely(!data))
return ERR_PTR(-ENOMEM);
- if (unlikely(__copy_from_user(data, buf, len))) {
+ if (unlikely(copy_from_user(data, buf, len))) {
kfree(data);
return ERR_PTR(-EFAULT);
}
diff --git a/drivers/usb/gadget/function/f_loopback.c b/drivers/usb/gadget/function/f_loopback.c
index 23933bdf2d9d..ddc3aad886b7 100644
--- a/drivers/usb/gadget/function/f_loopback.c
+++ b/drivers/usb/gadget/function/f_loopback.c
@@ -329,7 +329,7 @@ static int alloc_requests(struct usb_composite_dev *cdev,
for (i = 0; i < loop->qlen && result == 0; i++) {
result = -ENOMEM;
- in_req = usb_ep_alloc_request(loop->in_ep, GFP_KERNEL);
+ in_req = usb_ep_alloc_request(loop->in_ep, GFP_ATOMIC);
if (!in_req)
goto fail;
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 42acb45e1ab4..898a570319f1 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -370,6 +370,7 @@ static int f_midi_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
if (err) {
ERROR(midi, "%s queue req: %d\n",
midi->out_ep->name, err);
+ free_ep_req(midi->out_ep, req);
}
}
@@ -545,7 +546,7 @@ static void f_midi_transmit(struct f_midi *midi, struct usb_request *req)
}
}
- if (req->length > 0) {
+ if (req->length > 0 && ep->enabled) {
int err;
err = usb_ep_queue(ep, req, GFP_ATOMIC);
diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c
index 289ebca316d3..ad8c9b05572d 100644
--- a/drivers/usb/gadget/function/uvc_configfs.c
+++ b/drivers/usb/gadget/function/uvc_configfs.c
@@ -20,7 +20,7 @@
#define UVC_ATTR(prefix, cname, aname) \
static struct configfs_attribute prefix##attr_##cname = { \
.ca_name = __stringify(aname), \
- .ca_mode = S_IRUGO, \
+ .ca_mode = S_IRUGO | S_IWUGO, \
.ca_owner = THIS_MODULE, \
.show = prefix##cname##_show, \
.store = prefix##cname##_store, \
diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
index f0f2b066ac08..f92f5aff0dd5 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -1633,7 +1633,7 @@ static irqreturn_t usba_udc_irq(int irq, void *devid)
spin_lock(&udc->lock);
int_enb = usba_int_enb_get(udc);
- status = usba_readl(udc, INT_STA) & int_enb;
+ status = usba_readl(udc, INT_STA) & (int_enb | USBA_HIGH_SPEED);
DBG(DBG_INT, "irq, status=%#08x\n", status);
if (status & USBA_DET_SUSPEND) {
diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c
index 670ac0b12f00..001a3b74a993 100644
--- a/drivers/usb/gadget/udc/pxa27x_udc.c
+++ b/drivers/usb/gadget/udc/pxa27x_udc.c
@@ -2536,6 +2536,9 @@ static int pxa_udc_suspend(struct platform_device *_dev, pm_message_t state)
udc->pullup_resume = udc->pullup_on;
dplus_pullup(udc, 0);
+ if (udc->driver)
+ udc->driver->disconnect(&udc->gadget);
+
return 0;
}
diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c
index 342ffd140122..8c6e15bd6ff0 100644
--- a/drivers/usb/host/ohci-at91.c
+++ b/drivers/usb/host/ohci-at91.c
@@ -473,6 +473,8 @@ static int ohci_hcd_at91_drv_probe(struct platform_device *pdev)
if (!pdata)
return -ENOMEM;
+ pdev->dev.platform_data = pdata;
+
if (!of_property_read_u32(np, "num-ports", &ports))
pdata->ports = ports;
@@ -483,6 +485,7 @@ static int ohci_hcd_at91_drv_probe(struct platform_device *pdev)
*/
if (i >= pdata->ports) {
pdata->vbus_pin[i] = -EINVAL;
+ pdata->overcurrent_pin[i] = -EINVAL;
continue;
}
@@ -513,10 +516,8 @@ static int ohci_hcd_at91_drv_probe(struct platform_device *pdev)
}
at91_for_each_port(i) {
- if (i >= pdata->ports) {
- pdata->overcurrent_pin[i] = -EINVAL;
- continue;
- }
+ if (i >= pdata->ports)
+ break;
pdata->overcurrent_pin[i] =
of_get_named_gpio_flags(np, "atmel,oc-gpio", i, &flags);
@@ -552,8 +553,6 @@ static int ohci_hcd_at91_drv_probe(struct platform_device *pdev)
}
}
- pdev->dev.platform_data = pdata;
-
device_init_wakeup(&pdev->dev, 1);
return usb_hcd_at91_probe(&ohci_at91_hc_driver, pdev);
}
diff --git a/drivers/usb/host/whci/qset.c b/drivers/usb/host/whci/qset.c
index dc31c425ce01..9f1c0538b211 100644
--- a/drivers/usb/host/whci/qset.c
+++ b/drivers/usb/host/whci/qset.c
@@ -377,6 +377,10 @@ static int qset_fill_page_list(struct whc *whc, struct whc_std *std, gfp_t mem_f
if (std->pl_virt == NULL)
return -ENOMEM;
std->dma_addr = dma_map_single(whc->wusbhc.dev, std->pl_virt, pl_len, DMA_TO_DEVICE);
+ if (dma_mapping_error(whc->wusbhc.dev, std->dma_addr)) {
+ kfree(std->pl_virt);
+ return -EFAULT;
+ }
for (p = 0; p < std->num_pointers; p++) {
std->pl_virt[p].buf_ptr = cpu_to_le64(dma_addr);
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 5d2d7e954bd4..f980c239eded 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -733,8 +733,30 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd,
if ((raw_port_status & PORT_RESET) ||
!(raw_port_status & PORT_PE))
return 0xffffffff;
- if (time_after_eq(jiffies,
- bus_state->resume_done[wIndex])) {
+ /* did port event handler already start resume timing? */
+ if (!bus_state->resume_done[wIndex]) {
+ /* If not, maybe we are in a host initated resume? */
+ if (test_bit(wIndex, &bus_state->resuming_ports)) {
+ /* Host initated resume doesn't time the resume
+ * signalling using resume_done[].
+ * It manually sets RESUME state, sleeps 20ms
+ * and sets U0 state. This should probably be
+ * changed, but not right now.
+ */
+ } else {
+ /* port resume was discovered now and here,
+ * start resume timing
+ */
+ unsigned long timeout = jiffies +
+ msecs_to_jiffies(USB_RESUME_TIMEOUT);
+
+ set_bit(wIndex, &bus_state->resuming_ports);
+ bus_state->resume_done[wIndex] = timeout;
+ mod_timer(&hcd->rh_timer, timeout);
+ }
+ /* Has resume been signalled for USB_RESUME_TIME yet? */
+ } else if (time_after_eq(jiffies,
+ bus_state->resume_done[wIndex])) {
int time_left;
xhci_dbg(xhci, "Resume USB2 port %d\n",
@@ -775,19 +797,35 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd,
} else {
/*
* The resume has been signaling for less than
- * 20ms. Report the port status as SUSPEND,
- * let the usbcore check port status again
- * and clear resume signaling later.
+ * USB_RESUME_TIME. Report the port status as SUSPEND,
+ * let the usbcore check port status again and clear
+ * resume signaling later.
*/
status |= USB_PORT_STAT_SUSPEND;
}
}
- if ((raw_port_status & PORT_PLS_MASK) == XDEV_U0
- && (raw_port_status & PORT_POWER)
- && (bus_state->suspended_ports & (1 << wIndex))) {
- bus_state->suspended_ports &= ~(1 << wIndex);
- if (hcd->speed < HCD_USB3)
- bus_state->port_c_suspend |= 1 << wIndex;
+ /*
+ * Clear stale usb2 resume signalling variables in case port changed
+ * state during resume signalling. For example on error
+ */
+ if ((bus_state->resume_done[wIndex] ||
+ test_bit(wIndex, &bus_state->resuming_ports)) &&
+ (raw_port_status & PORT_PLS_MASK) != XDEV_U3 &&
+ (raw_port_status & PORT_PLS_MASK) != XDEV_RESUME) {
+ bus_state->resume_done[wIndex] = 0;
+ clear_bit(wIndex, &bus_state->resuming_ports);
+ }
+
+
+ if ((raw_port_status & PORT_PLS_MASK) == XDEV_U0 &&
+ (raw_port_status & PORT_POWER)) {
+ if (bus_state->suspended_ports & (1 << wIndex)) {
+ bus_state->suspended_ports &= ~(1 << wIndex);
+ if (hcd->speed < HCD_USB3)
+ bus_state->port_c_suspend |= 1 << wIndex;
+ }
+ bus_state->resume_done[wIndex] = 0;
+ clear_bit(wIndex, &bus_state->resuming_ports);
}
if (raw_port_status & PORT_CONNECT) {
status |= USB_PORT_STAT_CONNECTION;
@@ -1112,6 +1150,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
if ((temp & PORT_PE) == 0)
goto error;
+ set_bit(wIndex, &bus_state->resuming_ports);
xhci_set_link_state(xhci, port_array, wIndex,
XDEV_RESUME);
spin_unlock_irqrestore(&xhci->lock, flags);
@@ -1119,6 +1158,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
spin_lock_irqsave(&xhci->lock, flags);
xhci_set_link_state(xhci, port_array, wIndex,
XDEV_U0);
+ clear_bit(wIndex, &bus_state->resuming_ports);
}
bus_state->port_c_suspend |= 1 << wIndex;
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 17f6897acde2..c62109091d12 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -188,10 +188,14 @@ static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev)
0xb7, 0x0c, 0x34, 0xac, 0x01, 0xe9, 0xbf, 0x45,
0xb7, 0xe6, 0x2b, 0x34, 0xec, 0x93, 0x1e, 0x23,
};
- acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), intel_dsm_uuid, 3, 1, NULL);
+ union acpi_object *obj;
+
+ obj = acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), intel_dsm_uuid, 3, 1,
+ NULL);
+ ACPI_FREE(obj);
}
#else
- static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev) { }
+static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev) { }
#endif /* CONFIG_ACPI */
/* called during probe() after chip reset completes */
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index fa836251ca21..eeaa6c6bd540 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1583,7 +1583,8 @@ static void handle_port_status(struct xhci_hcd *xhci,
*/
bogus_port_status = true;
goto cleanup;
- } else {
+ } else if (!test_bit(faked_port_index,
+ &bus_state->resuming_ports)) {
xhci_dbg(xhci, "resume HS port %d\n", port_id);
bus_state->resume_done[faked_port_index] = jiffies +
msecs_to_jiffies(USB_RESUME_TIMEOUT);
@@ -3896,28 +3897,6 @@ cleanup:
return ret;
}
-static int ep_ring_is_processing(struct xhci_hcd *xhci,
- int slot_id, unsigned int ep_index)
-{
- struct xhci_virt_device *xdev;
- struct xhci_ring *ep_ring;
- struct xhci_ep_ctx *ep_ctx;
- struct xhci_virt_ep *xep;
- dma_addr_t hw_deq;
-
- xdev = xhci->devs[slot_id];
- xep = &xhci->devs[slot_id]->eps[ep_index];
- ep_ring = xep->ring;
- ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index);
-
- if ((le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK) != EP_STATE_RUNNING)
- return 0;
-
- hw_deq = le64_to_cpu(ep_ctx->deq) & ~EP_CTX_CYCLE_MASK;
- return (hw_deq !=
- xhci_trb_virt_to_dma(ep_ring->enq_seg, ep_ring->enqueue));
-}
-
/*
* Check transfer ring to guarantee there is enough room for the urb.
* Update ISO URB start_frame and interval.
@@ -3983,10 +3962,12 @@ int xhci_queue_isoc_tx_prepare(struct xhci_hcd *xhci, gfp_t mem_flags,
}
/* Calculate the start frame and put it in urb->start_frame. */
- if (HCC_CFC(xhci->hcc_params) &&
- ep_ring_is_processing(xhci, slot_id, ep_index)) {
- urb->start_frame = xep->next_frame_id;
- goto skip_start_over;
+ if (HCC_CFC(xhci->hcc_params) && !list_empty(&ep_ring->td_list)) {
+ if ((le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK) ==
+ EP_STATE_RUNNING) {
+ urb->start_frame = xep->next_frame_id;
+ goto skip_start_over;
+ }
}
start_frame = readl(&xhci->run_regs->microframe_index);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 6e7dc6f93978..3f912705dcef 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -175,6 +175,16 @@ int xhci_reset(struct xhci_hcd *xhci)
command |= CMD_RESET;
writel(command, &xhci->op_regs->command);
+ /* Existing Intel xHCI controllers require a delay of 1 mS,
+ * after setting the CMD_RESET bit, and before accessing any
+ * HC registers. This allows the HC to complete the
+ * reset operation and be ready for HC register access.
+ * Without this delay, the subsequent HC register access,
+ * may result in a system hang very rarely.
+ */
+ if (xhci->quirks & XHCI_INTEL_HOST)
+ udelay(1000);
+
ret = xhci_handshake(&xhci->op_regs->command,
CMD_RESET, 0, 10 * 1000 * 1000);
if (ret)
@@ -4768,8 +4778,16 @@ int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev,
ctrl_ctx->add_flags |= cpu_to_le32(SLOT_FLAG);
slot_ctx = xhci_get_slot_ctx(xhci, config_cmd->in_ctx);
slot_ctx->dev_info |= cpu_to_le32(DEV_HUB);
+ /*
+ * refer to section 6.2.2: MTT should be 0 for full speed hub,
+ * but it may be already set to 1 when setup an xHCI virtual
+ * device, so clear it anyway.
+ */
if (tt->multi)
slot_ctx->dev_info |= cpu_to_le32(DEV_MTT);
+ else if (hdev->speed == USB_SPEED_FULL)
+ slot_ctx->dev_info &= cpu_to_le32(~DEV_MTT);
+
if (xhci->hci_version > 0x95) {
xhci_dbg(xhci, "xHCI version %x needs hub "
"TT think time and number of ports\n",
diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig
index 1f2037bbeb0d..45c83baf675d 100644
--- a/drivers/usb/musb/Kconfig
+++ b/drivers/usb/musb/Kconfig
@@ -159,7 +159,7 @@ config USB_TI_CPPI_DMA
config USB_TI_CPPI41_DMA
bool 'TI CPPI 4.1 (AM335x)'
- depends on ARCH_OMAP
+ depends on ARCH_OMAP && DMADEVICES
select TI_CPPI41
config USB_TUSB_OMAP_DMA
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index ba13529cbd52..ee9ff7028b92 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -132,7 +132,7 @@ static inline struct musb *dev_to_musb(struct device *dev)
/*-------------------------------------------------------------------------*/
#ifndef CONFIG_BLACKFIN
-static int musb_ulpi_read(struct usb_phy *phy, u32 offset)
+static int musb_ulpi_read(struct usb_phy *phy, u32 reg)
{
void __iomem *addr = phy->io_priv;
int i = 0;
@@ -151,7 +151,7 @@ static int musb_ulpi_read(struct usb_phy *phy, u32 offset)
* ULPICarKitControlDisableUTMI after clearing POWER_SUSPENDM.
*/
- musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)offset);
+ musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)reg);
musb_writeb(addr, MUSB_ULPI_REG_CONTROL,
MUSB_ULPI_REG_REQ | MUSB_ULPI_RDN_WR);
@@ -176,7 +176,7 @@ out:
return ret;
}
-static int musb_ulpi_write(struct usb_phy *phy, u32 offset, u32 data)
+static int musb_ulpi_write(struct usb_phy *phy, u32 val, u32 reg)
{
void __iomem *addr = phy->io_priv;
int i = 0;
@@ -191,8 +191,8 @@ static int musb_ulpi_write(struct usb_phy *phy, u32 offset, u32 data)
power &= ~MUSB_POWER_SUSPENDM;
musb_writeb(addr, MUSB_POWER, power);
- musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)offset);
- musb_writeb(addr, MUSB_ULPI_REG_DATA, (u8)data);
+ musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)reg);
+ musb_writeb(addr, MUSB_ULPI_REG_DATA, (u8)val);
musb_writeb(addr, MUSB_ULPI_REG_CONTROL, MUSB_ULPI_REG_REQ);
while (!(musb_readb(addr, MUSB_ULPI_REG_CONTROL)
@@ -1668,7 +1668,7 @@ EXPORT_SYMBOL_GPL(musb_interrupt);
static bool use_dma = 1;
/* "modprobe ... use_dma=0" etc */
-module_param(use_dma, bool, 0);
+module_param(use_dma, bool, 0644);
MODULE_PARM_DESC(use_dma, "enable/disable use of DMA");
void musb_dma_completion(struct musb *musb, u8 epnum, u8 transmit)
@@ -2017,7 +2017,6 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
/* We need musb_read/write functions initialized for PM */
pm_runtime_use_autosuspend(musb->controller);
pm_runtime_set_autosuspend_delay(musb->controller, 200);
- pm_runtime_irq_safe(musb->controller);
pm_runtime_enable(musb->controller);
/* The musb_platform_init() call:
@@ -2095,6 +2094,7 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
#ifndef CONFIG_MUSB_PIO_ONLY
if (!musb->ops->dma_init || !musb->ops->dma_exit) {
dev_err(dev, "DMA controller not set\n");
+ status = -ENODEV;
goto fail2;
}
musb_dma_controller_create = musb->ops->dma_init;
@@ -2218,6 +2218,12 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
pm_runtime_put(musb->controller);
+ /*
+ * For why this is currently needed, see commit 3e43a0725637
+ * ("usb: musb: core: add pm_runtime_irq_safe()")
+ */
+ pm_runtime_irq_safe(musb->controller);
+
return 0;
fail5:
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 26c65e66cc0f..795a45b1b25b 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -112,22 +112,32 @@ static void musb_h_tx_flush_fifo(struct musb_hw_ep *ep)
struct musb *musb = ep->musb;
void __iomem *epio = ep->regs;
u16 csr;
- u16 lastcsr = 0;
int retries = 1000;
csr = musb_readw(epio, MUSB_TXCSR);
while (csr & MUSB_TXCSR_FIFONOTEMPTY) {
- if (csr != lastcsr)
- dev_dbg(musb->controller, "Host TX FIFONOTEMPTY csr: %02x\n", csr);
- lastcsr = csr;
csr |= MUSB_TXCSR_FLUSHFIFO | MUSB_TXCSR_TXPKTRDY;
musb_writew(epio, MUSB_TXCSR, csr);
csr = musb_readw(epio, MUSB_TXCSR);
- if (WARN(retries-- < 1,
+
+ /*
+ * FIXME: sometimes the tx fifo flush failed, it has been
+ * observed during device disconnect on AM335x.
+ *
+ * To reproduce the issue, ensure tx urb(s) are queued when
+ * unplug the usb device which is connected to AM335x usb
+ * host port.
+ *
+ * I found using a usb-ethernet device and running iperf
+ * (client on AM335x) has very high chance to trigger it.
+ *
+ * Better to turn on dev_dbg() in musb_cleanup_urb() with
+ * CPPI enabled to see the issue when aborting the tx channel.
+ */
+ if (dev_WARN_ONCE(musb->controller, retries-- < 1,
"Could not flush host TX%d fifo: csr: %04x\n",
ep->epnum, csr))
return;
- mdelay(1);
}
}
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 173132416170..22e8ecb6bfbd 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -21,7 +21,6 @@ config AB8500_USB
config FSL_USB2_OTG
bool "Freescale USB OTG Transceiver Driver"
depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM && PM
- select USB_OTG
select USB_PHY
help
Enable this to support Freescale USB OTG transceiver.
@@ -168,8 +167,7 @@ config USB_QCOM_8X16_PHY
config USB_MV_OTG
tristate "Marvell USB OTG support"
- depends on USB_EHCI_MV && USB_MV_UDC && PM
- select USB_OTG
+ depends on USB_EHCI_MV && USB_MV_UDC && PM && USB_OTG
select USB_PHY
help
Say Y here if you want to build Marvell USB OTG transciever
diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index 80eb991c2506..0d19a6d61a71 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -1506,7 +1506,6 @@ static int msm_otg_read_dt(struct platform_device *pdev, struct msm_otg *motg)
{
struct msm_otg_platform_data *pdata;
struct extcon_dev *ext_id, *ext_vbus;
- const struct of_device_id *id;
struct device_node *node = pdev->dev.of_node;
struct property *prop;
int len, ret, words;
@@ -1518,8 +1517,9 @@ static int msm_otg_read_dt(struct platform_device *pdev, struct msm_otg *motg)
motg->pdata = pdata;
- id = of_match_device(msm_otg_dt_match, &pdev->dev);
- pdata->phy_type = (enum msm_usb_phy_type) id->data;
+ pdata->phy_type = (enum msm_usb_phy_type)of_device_get_match_data(&pdev->dev);
+ if (!pdata->phy_type)
+ return 1;
motg->link_rst = devm_reset_control_get(&pdev->dev, "link");
if (IS_ERR(motg->link_rst))
diff --git a/drivers/usb/phy/phy-mxs-usb.c b/drivers/usb/phy/phy-mxs-usb.c
index 4d863ebc117c..c2936dc48ca7 100644
--- a/drivers/usb/phy/phy-mxs-usb.c
+++ b/drivers/usb/phy/phy-mxs-usb.c
@@ -143,12 +143,17 @@ static const struct mxs_phy_data imx6sx_phy_data = {
.flags = MXS_PHY_DISCONNECT_LINE_WITHOUT_VBUS,
};
+static const struct mxs_phy_data imx6ul_phy_data = {
+ .flags = MXS_PHY_DISCONNECT_LINE_WITHOUT_VBUS,
+};
+
static const struct of_device_id mxs_phy_dt_ids[] = {
{ .compatible = "fsl,imx6sx-usbphy", .data = &imx6sx_phy_data, },
{ .compatible = "fsl,imx6sl-usbphy", .data = &imx6sl_phy_data, },
{ .compatible = "fsl,imx6q-usbphy", .data = &imx6q_phy_data, },
{ .compatible = "fsl,imx23-usbphy", .data = &imx23_phy_data, },
{ .compatible = "fsl,vf610-usbphy", .data = &vf610_phy_data, },
+ { .compatible = "fsl,imx6ul-usbphy", .data = &imx6ul_phy_data, },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, mxs_phy_dt_ids);
@@ -452,10 +457,13 @@ static int mxs_phy_probe(struct platform_device *pdev)
struct clk *clk;
struct mxs_phy *mxs_phy;
int ret;
- const struct of_device_id *of_id =
- of_match_device(mxs_phy_dt_ids, &pdev->dev);
+ const struct of_device_id *of_id;
struct device_node *np = pdev->dev.of_node;
+ of_id = of_match_device(mxs_phy_dt_ids, &pdev->dev);
+ if (!of_id)
+ return -ENODEV;
+
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
base = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(base))
diff --git a/drivers/usb/phy/phy-omap-otg.c b/drivers/usb/phy/phy-omap-otg.c
index 1270906ccb95..c4bf2de6d14e 100644
--- a/drivers/usb/phy/phy-omap-otg.c
+++ b/drivers/usb/phy/phy-omap-otg.c
@@ -105,7 +105,6 @@ static int omap_otg_probe(struct platform_device *pdev)
extcon = extcon_get_extcon_dev(config->extcon);
if (!extcon)
return -EPROBE_DEFER;
- otg_dev->extcon = extcon;
otg_dev = devm_kzalloc(&pdev->dev, sizeof(*otg_dev), GFP_KERNEL);
if (!otg_dev)
@@ -115,6 +114,7 @@ static int omap_otg_probe(struct platform_device *pdev)
if (IS_ERR(otg_dev->base))
return PTR_ERR(otg_dev->base);
+ otg_dev->extcon = extcon;
otg_dev->id_nb.notifier_call = omap_otg_id_notifier;
otg_dev->vbus_nb.notifier_call = omap_otg_vbus_notifier;
diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c
index de4f97d84a82..8f7a78e70975 100644
--- a/drivers/usb/renesas_usbhs/mod_gadget.c
+++ b/drivers/usb/renesas_usbhs/mod_gadget.c
@@ -131,7 +131,8 @@ static void __usbhsg_queue_pop(struct usbhsg_uep *uep,
struct device *dev = usbhsg_gpriv_to_dev(gpriv);
struct usbhs_priv *priv = usbhsg_gpriv_to_priv(gpriv);
- dev_dbg(dev, "pipe %d : queue pop\n", usbhs_pipe_number(pipe));
+ if (pipe)
+ dev_dbg(dev, "pipe %d : queue pop\n", usbhs_pipe_number(pipe));
ureq->req.status = status;
spin_unlock(usbhs_priv_to_lock(priv));
@@ -685,7 +686,13 @@ static int usbhsg_ep_dequeue(struct usb_ep *ep, struct usb_request *req)
struct usbhsg_request *ureq = usbhsg_req_to_ureq(req);
struct usbhs_pipe *pipe = usbhsg_uep_to_pipe(uep);
- usbhs_pkt_pop(pipe, usbhsg_ureq_to_pkt(ureq));
+ if (pipe)
+ usbhs_pkt_pop(pipe, usbhsg_ureq_to_pkt(ureq));
+
+ /*
+ * To dequeue a request, this driver should call the usbhsg_queue_pop()
+ * even if the pipe is NULL.
+ */
usbhsg_queue_pop(uep, ureq, -ECONNRESET);
return 0;
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index eac7ccaa3c85..7d4f51a32e66 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -132,7 +132,6 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
{ USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */
{ USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */
- { USB_DEVICE(0x10C4, 0xEA80) }, /* Silicon Labs factory default */
{ USB_DEVICE(0x10C4, 0xEA71) }, /* Infinity GPS-MIC-1 Radio Monophone */
{ USB_DEVICE(0x10C4, 0xF001) }, /* Elan Digital Systems USBscope50 */
{ USB_DEVICE(0x10C4, 0xF002) }, /* Elan Digital Systems USBwave12 */
diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c
index f51a5d52c0ed..ec1b8f2c1183 100644
--- a/drivers/usb/serial/ipaq.c
+++ b/drivers/usb/serial/ipaq.c
@@ -531,7 +531,8 @@ static int ipaq_open(struct tty_struct *tty,
* through. Since this has a reasonably high failure rate, we retry
* several times.
*/
- while (retries--) {
+ while (retries) {
+ retries--;
result = usb_control_msg(serial->dev,
usb_sndctrlpipe(serial->dev, 0), 0x22, 0x21,
0x1, 0, NULL, 0, 100);
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 685fef71d3d1..f2280606b73c 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -161,6 +161,7 @@ static void option_instat_callback(struct urb *urb);
#define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED 0x9001
#define NOVATELWIRELESS_PRODUCT_E362 0x9010
#define NOVATELWIRELESS_PRODUCT_E371 0x9011
+#define NOVATELWIRELESS_PRODUCT_U620L 0x9022
#define NOVATELWIRELESS_PRODUCT_G2 0xA010
#define NOVATELWIRELESS_PRODUCT_MC551 0xB001
@@ -354,6 +355,7 @@ static void option_instat_callback(struct urb *urb);
/* This is the 4G XS Stick W14 a.k.a. Mobilcom Debitel Surf-Stick *
* It seems to contain a Qualcomm QSC6240/6290 chipset */
#define FOUR_G_SYSTEMS_PRODUCT_W14 0x9603
+#define FOUR_G_SYSTEMS_PRODUCT_W100 0x9b01
/* iBall 3.5G connect wireless modem */
#define IBALL_3_5G_CONNECT 0x9605
@@ -519,6 +521,11 @@ static const struct option_blacklist_info four_g_w14_blacklist = {
.sendsetup = BIT(0) | BIT(1),
};
+static const struct option_blacklist_info four_g_w100_blacklist = {
+ .sendsetup = BIT(1) | BIT(2),
+ .reserved = BIT(3),
+};
+
static const struct option_blacklist_info alcatel_x200_blacklist = {
.sendsetup = BIT(0) | BIT(1),
.reserved = BIT(4),
@@ -1052,6 +1059,7 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC551, 0xff, 0xff, 0xff) },
{ USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E362, 0xff, 0xff, 0xff) },
{ USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E371, 0xff, 0xff, 0xff) },
+ { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U620L, 0xff, 0x00, 0x00) },
{ USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) },
{ USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) },
@@ -1641,6 +1649,9 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14),
.driver_info = (kernel_ulong_t)&four_g_w14_blacklist
},
+ { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W100),
+ .driver_info = (kernel_ulong_t)&four_g_w100_blacklist
+ },
{ USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, SPEEDUP_PRODUCT_SU9800, 0xff) },
{ USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) },
{ USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) },
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index 5022fcfa0260..9919d2a9faf2 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -22,6 +22,8 @@
#define DRIVER_AUTHOR "Qualcomm Inc"
#define DRIVER_DESC "Qualcomm USB Serial driver"
+#define QUECTEL_EC20_PID 0x9215
+
/* standard device layouts supported by this driver */
enum qcserial_layouts {
QCSERIAL_G2K = 0, /* Gobi 2000 */
@@ -171,6 +173,38 @@ static const struct usb_device_id id_table[] = {
};
MODULE_DEVICE_TABLE(usb, id_table);
+static int handle_quectel_ec20(struct device *dev, int ifnum)
+{
+ int altsetting = 0;
+
+ /*
+ * Quectel EC20 Mini PCIe LTE module layout:
+ * 0: DM/DIAG (use libqcdm from ModemManager for communication)
+ * 1: NMEA
+ * 2: AT-capable modem port
+ * 3: Modem interface
+ * 4: NDIS
+ */
+ switch (ifnum) {
+ case 0:
+ dev_dbg(dev, "Quectel EC20 DM/DIAG interface found\n");
+ break;
+ case 1:
+ dev_dbg(dev, "Quectel EC20 NMEA GPS interface found\n");
+ break;
+ case 2:
+ case 3:
+ dev_dbg(dev, "Quectel EC20 Modem port found\n");
+ break;
+ case 4:
+ /* Don't claim the QMI/net interface */
+ altsetting = -1;
+ break;
+ }
+
+ return altsetting;
+}
+
static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
{
struct usb_host_interface *intf = serial->interface->cur_altsetting;
@@ -181,6 +215,10 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
int altsetting = -1;
bool sendsetup = false;
+ /* we only support vendor specific functions */
+ if (intf->desc.bInterfaceClass != USB_CLASS_VENDOR_SPEC)
+ goto done;
+
nintf = serial->dev->actconfig->desc.bNumInterfaces;
dev_dbg(dev, "Num Interfaces = %d\n", nintf);
ifnum = intf->desc.bInterfaceNumber;
@@ -240,6 +278,12 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
altsetting = -1;
break;
case QCSERIAL_G2K:
+ /* handle non-standard layouts */
+ if (nintf == 5 && id->idProduct == QUECTEL_EC20_PID) {
+ altsetting = handle_quectel_ec20(dev, ifnum);
+ goto done;
+ }
+
/*
* Gobi 2K+ USB layout:
* 0: QMI/net
@@ -301,29 +345,39 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
break;
case QCSERIAL_HWI:
/*
- * Huawei layout:
- * 0: AT-capable modem port
- * 1: DM/DIAG
- * 2: AT-capable modem port
- * 3: CCID-compatible PCSC interface
- * 4: QMI/net
- * 5: NMEA
+ * Huawei devices map functions by subclass + protocol
+ * instead of interface numbers. The protocol identify
+ * a specific function, while the subclass indicate a
+ * specific firmware source
+ *
+ * This is a blacklist of functions known to be
+ * non-serial. The rest are assumed to be serial and
+ * will be handled by this driver
*/
- switch (ifnum) {
- case 0:
- case 2:
- dev_dbg(dev, "Modem port found\n");
- break;
- case 1:
- dev_dbg(dev, "DM/DIAG interface found\n");
- break;
- case 5:
- dev_dbg(dev, "NMEA GPS interface found\n");
- break;
- default:
- /* don't claim any unsupported interface */
+ switch (intf->desc.bInterfaceProtocol) {
+ /* QMI combined (qmi_wwan) */
+ case 0x07:
+ case 0x37:
+ case 0x67:
+ /* QMI data (qmi_wwan) */
+ case 0x08:
+ case 0x38:
+ case 0x68:
+ /* QMI control (qmi_wwan) */
+ case 0x09:
+ case 0x39:
+ case 0x69:
+ /* NCM like (huawei_cdc_ncm) */
+ case 0x16:
+ case 0x46:
+ case 0x76:
altsetting = -1;
break;
+ default:
+ dev_dbg(dev, "Huawei type serial port found (%02x/%02x/%02x)\n",
+ intf->desc.bInterfaceClass,
+ intf->desc.bInterfaceSubClass,
+ intf->desc.bInterfaceProtocol);
}
break;
default:
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index e9da41d9fe7f..2694df2f4559 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -159,6 +159,7 @@ static const struct usb_device_id ti_id_table_3410[] = {
{ USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STEREO_PLUG_ID) },
{ USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) },
{ USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) },
+ { USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) },
{ } /* terminator */
};
@@ -191,6 +192,7 @@ static const struct usb_device_id ti_id_table_combined[] = {
{ USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_PRODUCT_ID) },
{ USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) },
{ USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) },
+ { USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) },
{ } /* terminator */
};
diff --git a/drivers/usb/serial/ti_usb_3410_5052.h b/drivers/usb/serial/ti_usb_3410_5052.h
index 4a2423e84d55..98f35c656c02 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.h
+++ b/drivers/usb/serial/ti_usb_3410_5052.h
@@ -56,6 +56,10 @@
#define ABBOTT_PRODUCT_ID ABBOTT_STEREO_PLUG_ID
#define ABBOTT_STRIP_PORT_ID 0x3420
+/* Honeywell vendor and product IDs */
+#define HONEYWELL_VENDOR_ID 0x10ac
+#define HONEYWELL_HGI80_PRODUCT_ID 0x0102 /* Honeywell HGI80 */
+
/* Commands */
#define TI_GET_VERSION 0x01
#define TI_GET_PORT_STATUS 0x02
diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c
index 3658662898fc..a204782ae530 100644
--- a/drivers/usb/serial/usb-serial-simple.c
+++ b/drivers/usb/serial/usb-serial-simple.c
@@ -53,6 +53,7 @@ DEVICE(funsoft, FUNSOFT_IDS);
/* Infineon Flashloader driver */
#define FLASHLOADER_IDS() \
+ { USB_DEVICE_INTERFACE_CLASS(0x058b, 0x0041, USB_CLASS_CDC_DATA) }, \
{ USB_DEVICE(0x8087, 0x0716) }
DEVICE(flashloader, FLASHLOADER_IDS);
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index e69151664436..5c66d3f7a6d0 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -796,6 +796,10 @@ static int uas_slave_configure(struct scsi_device *sdev)
if (devinfo->flags & US_FL_NO_REPORT_OPCODES)
sdev->no_report_opcodes = 1;
+ /* A few buggy USB-ATA bridges don't understand FUA */
+ if (devinfo->flags & US_FL_BROKEN_FUA)
+ sdev->broken_fua = 1;
+
scsi_change_queue_depth(sdev, devinfo->qdepth - 2);
return 0;
}
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 6b2479123de7..7ffe4209067b 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -1987,7 +1987,7 @@ UNUSUAL_DEV( 0x14cd, 0x6600, 0x0201, 0x0201,
US_FL_IGNORE_RESIDUE ),
/* Reported by Michael Büsch <m@bues.ch> */
-UNUSUAL_DEV( 0x152d, 0x0567, 0x0114, 0x0114,
+UNUSUAL_DEV( 0x152d, 0x0567, 0x0114, 0x0116,
"JMicron",
"USB to ATA/ATAPI Bridge",
USB_SC_DEVICE, USB_PR_DEVICE, NULL,
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index c85ea530085f..ccc113e83d88 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -132,7 +132,7 @@ UNUSUAL_DEV(0x152d, 0x0567, 0x0000, 0x9999,
"JMicron",
"JMS567",
USB_SC_DEVICE, USB_PR_DEVICE, NULL,
- US_FL_NO_REPORT_OPCODES),
+ US_FL_BROKEN_FUA | US_FL_NO_REPORT_OPCODES),
/* Reported-by: Hans de Goede <hdegoede@redhat.com> */
UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index da6e2ce77495..850d86ca685b 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -31,21 +31,6 @@ menuconfig VFIO
If you don't know what to do here, say N.
-menuconfig VFIO_NOIOMMU
- bool "VFIO No-IOMMU support"
- depends on VFIO
- help
- VFIO is built on the ability to isolate devices using the IOMMU.
- Only with an IOMMU can userspace access to DMA capable devices be
- considered secure. VFIO No-IOMMU mode enables IOMMU groups for
- devices without IOMMU backing for the purpose of re-using the VFIO
- infrastructure in a non-secure mode. Use of this mode will result
- in an unsupportable kernel and will therefore taint the kernel.
- Device assignment to virtual machines is also not possible with
- this mode since there is no IOMMU to provide DMA translation.
-
- If you don't know what to do here, say N.
-
source "drivers/vfio/pci/Kconfig"
source "drivers/vfio/platform/Kconfig"
source "virt/lib/Kconfig"
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 32b88bd2c82c..56bf6dbb93db 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -940,13 +940,13 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
return -EINVAL;
- group = vfio_iommu_group_get(&pdev->dev);
+ group = iommu_group_get(&pdev->dev);
if (!group)
return -EINVAL;
vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
if (!vdev) {
- vfio_iommu_group_put(group, &pdev->dev);
+ iommu_group_put(group);
return -ENOMEM;
}
@@ -957,7 +957,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
if (ret) {
- vfio_iommu_group_put(group, &pdev->dev);
+ iommu_group_put(group);
kfree(vdev);
return ret;
}
@@ -993,7 +993,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
if (!vdev)
return;
- vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
+ iommu_group_put(pdev->dev.iommu_group);
kfree(vdev);
if (vfio_pci_is_vga(pdev)) {
@@ -1035,7 +1035,7 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
return PCI_ERS_RESULT_CAN_RECOVER;
}
-static struct pci_error_handlers vfio_err_handlers = {
+static const struct pci_error_handlers vfio_err_handlers = {
.error_detected = vfio_pci_aer_err_detected,
};
diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c
index f1625dcfbb23..b1cc3a768784 100644
--- a/drivers/vfio/platform/vfio_platform.c
+++ b/drivers/vfio/platform/vfio_platform.c
@@ -92,7 +92,6 @@ static struct platform_driver vfio_platform_driver = {
.remove = vfio_platform_remove,
.driver = {
.name = "vfio-platform",
- .owner = THIS_MODULE,
},
};
diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index a1c50d630792..418cdd9ba3f4 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -51,13 +51,10 @@ static vfio_platform_reset_fn_t vfio_platform_lookup_reset(const char *compat,
static void vfio_platform_get_reset(struct vfio_platform_device *vdev)
{
- char modname[256];
-
vdev->reset = vfio_platform_lookup_reset(vdev->compat,
&vdev->reset_module);
if (!vdev->reset) {
- snprintf(modname, 256, "vfio-reset:%s", vdev->compat);
- request_module(modname);
+ request_module("vfio-reset:%s", vdev->compat);
vdev->reset = vfio_platform_lookup_reset(vdev->compat,
&vdev->reset_module);
}
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index de632da2e22f..6070b793cbcb 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -62,7 +62,6 @@ struct vfio_container {
struct rw_semaphore group_lock;
struct vfio_iommu_driver *iommu_driver;
void *iommu_data;
- bool noiommu;
};
struct vfio_unbound_dev {
@@ -85,7 +84,6 @@ struct vfio_group {
struct list_head unbound_list;
struct mutex unbound_lock;
atomic_t opened;
- bool noiommu;
};
struct vfio_device {
@@ -97,147 +95,6 @@ struct vfio_device {
void *device_data;
};
-#ifdef CONFIG_VFIO_NOIOMMU
-static bool noiommu __read_mostly;
-module_param_named(enable_unsafe_noiommu_support,
- noiommu, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
-#endif
-
-/*
- * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
- * and remove functions, any use cases other than acquiring the first
- * reference for the purpose of calling vfio_add_group_dev() or removing
- * that symmetric reference after vfio_del_group_dev() should use the raw
- * iommu_group_{get,put} functions. In particular, vfio_iommu_group_put()
- * removes the device from the dummy group and cannot be nested.
- */
-struct iommu_group *vfio_iommu_group_get(struct device *dev)
-{
- struct iommu_group *group;
- int __maybe_unused ret;
-
- group = iommu_group_get(dev);
-
-#ifdef CONFIG_VFIO_NOIOMMU
- /*
- * With noiommu enabled, an IOMMU group will be created for a device
- * that doesn't already have one and doesn't have an iommu_ops on their
- * bus. We use iommu_present() again in the main code to detect these
- * fake groups.
- */
- if (group || !noiommu || iommu_present(dev->bus))
- return group;
-
- group = iommu_group_alloc();
- if (IS_ERR(group))
- return NULL;
-
- iommu_group_set_name(group, "vfio-noiommu");
- ret = iommu_group_add_device(group, dev);
- iommu_group_put(group);
- if (ret)
- return NULL;
-
- /*
- * Where to taint? At this point we've added an IOMMU group for a
- * device that is not backed by iommu_ops, therefore any iommu_
- * callback using iommu_ops can legitimately Oops. So, while we may
- * be about to give a DMA capable device to a user without IOMMU
- * protection, which is clearly taint-worthy, let's go ahead and do
- * it here.
- */
- add_taint(TAINT_USER, LOCKDEP_STILL_OK);
- dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
-#endif
-
- return group;
-}
-EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
-
-void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
-{
-#ifdef CONFIG_VFIO_NOIOMMU
- if (!iommu_present(dev->bus))
- iommu_group_remove_device(dev);
-#endif
-
- iommu_group_put(group);
-}
-EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
-
-#ifdef CONFIG_VFIO_NOIOMMU
-static void *vfio_noiommu_open(unsigned long arg)
-{
- if (arg != VFIO_NOIOMMU_IOMMU)
- return ERR_PTR(-EINVAL);
- if (!capable(CAP_SYS_RAWIO))
- return ERR_PTR(-EPERM);
-
- return NULL;
-}
-
-static void vfio_noiommu_release(void *iommu_data)
-{
-}
-
-static long vfio_noiommu_ioctl(void *iommu_data,
- unsigned int cmd, unsigned long arg)
-{
- if (cmd == VFIO_CHECK_EXTENSION)
- return arg == VFIO_NOIOMMU_IOMMU ? 1 : 0;
-
- return -ENOTTY;
-}
-
-static int vfio_iommu_present(struct device *dev, void *unused)
-{
- return iommu_present(dev->bus) ? 1 : 0;
-}
-
-static int vfio_noiommu_attach_group(void *iommu_data,
- struct iommu_group *iommu_group)
-{
- return iommu_group_for_each_dev(iommu_group, NULL,
- vfio_iommu_present) ? -EINVAL : 0;
-}
-
-static void vfio_noiommu_detach_group(void *iommu_data,
- struct iommu_group *iommu_group)
-{
-}
-
-static struct vfio_iommu_driver_ops vfio_noiommu_ops = {
- .name = "vfio-noiommu",
- .owner = THIS_MODULE,
- .open = vfio_noiommu_open,
- .release = vfio_noiommu_release,
- .ioctl = vfio_noiommu_ioctl,
- .attach_group = vfio_noiommu_attach_group,
- .detach_group = vfio_noiommu_detach_group,
-};
-
-static struct vfio_iommu_driver vfio_noiommu_driver = {
- .ops = &vfio_noiommu_ops,
-};
-
-/*
- * Wrap IOMMU drivers, the noiommu driver is the one and only driver for
- * noiommu groups (and thus containers) and not available for normal groups.
- */
-#define vfio_for_each_iommu_driver(con, pos) \
- for (pos = con->noiommu ? &vfio_noiommu_driver : \
- list_first_entry(&vfio.iommu_drivers_list, \
- struct vfio_iommu_driver, vfio_next); \
- (con->noiommu ? pos != NULL : \
- &pos->vfio_next != &vfio.iommu_drivers_list); \
- pos = con->noiommu ? NULL : list_next_entry(pos, vfio_next))
-#else
-#define vfio_for_each_iommu_driver(con, pos) \
- list_for_each_entry(pos, &vfio.iommu_drivers_list, vfio_next)
-#endif
-
-
/**
* IOMMU driver registration
*/
@@ -342,8 +199,7 @@ static void vfio_group_unlock_and_free(struct vfio_group *group)
/**
* Group objects - create, release, get, put, search
*/
-static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
- bool noiommu)
+static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
{
struct vfio_group *group, *tmp;
struct device *dev;
@@ -361,7 +217,6 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
atomic_set(&group->container_users, 0);
atomic_set(&group->opened, 0);
group->iommu_group = iommu_group;
- group->noiommu = noiommu;
group->nb.notifier_call = vfio_iommu_group_notifier;
@@ -397,8 +252,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
dev = device_create(vfio.class, NULL,
MKDEV(MAJOR(vfio.group_devt), minor),
- group, "%s%d", noiommu ? "noiommu-" : "",
- iommu_group_id(iommu_group));
+ group, "%d", iommu_group_id(iommu_group));
if (IS_ERR(dev)) {
vfio_free_group_minor(minor);
vfio_group_unlock_and_free(group);
@@ -682,7 +536,7 @@ static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev)
return 0;
/* TODO Prevent device auto probing */
- WARN("Device %s added to live group %d!\n", dev_name(dev),
+ WARN(1, "Device %s added to live group %d!\n", dev_name(dev),
iommu_group_id(group->iommu_group));
return 0;
@@ -786,8 +640,7 @@ int vfio_add_group_dev(struct device *dev,
group = vfio_group_get_from_iommu(iommu_group);
if (!group) {
- group = vfio_create_group(iommu_group,
- !iommu_present(dev->bus));
+ group = vfio_create_group(iommu_group);
if (IS_ERR(group)) {
iommu_group_put(iommu_group);
return PTR_ERR(group);
@@ -999,7 +852,8 @@ static long vfio_ioctl_check_extension(struct vfio_container *container,
*/
if (!driver) {
mutex_lock(&vfio.iommu_drivers_lock);
- vfio_for_each_iommu_driver(container, driver) {
+ list_for_each_entry(driver, &vfio.iommu_drivers_list,
+ vfio_next) {
if (!try_module_get(driver->ops->owner))
continue;
@@ -1068,7 +922,7 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container,
}
mutex_lock(&vfio.iommu_drivers_lock);
- vfio_for_each_iommu_driver(container, driver) {
+ list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
void *data;
if (!try_module_get(driver->ops->owner))
@@ -1333,9 +1187,6 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
if (atomic_read(&group->container_users))
return -EINVAL;
- if (group->noiommu && !capable(CAP_SYS_RAWIO))
- return -EPERM;
-
f = fdget(container_fd);
if (!f.file)
return -EBADF;
@@ -1351,13 +1202,6 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
down_write(&container->group_lock);
- /* Real groups and fake groups cannot mix */
- if (!list_empty(&container->group_list) &&
- container->noiommu != group->noiommu) {
- ret = -EPERM;
- goto unlock_out;
- }
-
driver = container->iommu_driver;
if (driver) {
ret = driver->ops->attach_group(container->iommu_data,
@@ -1367,7 +1211,6 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
}
group->container = container;
- container->noiommu = group->noiommu;
list_add(&group->container_next, &container->group_list);
/* Get a reference on the container and mark a user within the group */
@@ -1398,9 +1241,6 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
!group->container->iommu_driver || !vfio_group_viable(group))
return -EINVAL;
- if (group->noiommu && !capable(CAP_SYS_RAWIO))
- return -EPERM;
-
device = vfio_device_get_from_name(group, buf);
if (!device)
return -ENODEV;
@@ -1443,10 +1283,6 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
fd_install(ret, filep);
- if (group->noiommu)
- dev_warn(device->dev, "vfio-noiommu device opened by user "
- "(%s:%d)\n", current->comm, task_pid_nr(current));
-
return ret;
}
@@ -1535,11 +1371,6 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
if (!group)
return -ENODEV;
- if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
- vfio_group_put(group);
- return -EPERM;
- }
-
/* Do we need multiple instances of the group open? Seems not. */
opened = atomic_cmpxchg(&group->opened, 0, 1);
if (opened) {
@@ -1702,11 +1533,6 @@ struct vfio_group *vfio_group_get_external_user(struct file *filep)
if (!atomic_inc_not_zero(&group->container_users))
return ERR_PTR(-EINVAL);
- if (group->noiommu) {
- atomic_dec(&group->container_users);
- return ERR_PTR(-EPERM);
- }
-
if (!group->container->iommu_driver ||
!vfio_group_viable(group)) {
atomic_dec(&group->container_users);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index eec2f11809ff..ad2146a9ab2d 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -819,7 +819,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE);
if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) ||
(a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) ||
- (a.log_guest_addr & (sizeof(u64) - 1))) {
+ (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1))) {
r = -EINVAL;
break;
}
@@ -1369,7 +1369,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
/* Grab the next descriptor number they're advertising, and increment
* the index we've seen. */
if (unlikely(__get_user(ring_head,
- &vq->avail->ring[last_avail_idx % vq->num]))) {
+ &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) {
vq_err(vq, "Failed to read head: idx %d address %p\n",
last_avail_idx,
&vq->avail->ring[last_avail_idx % vq->num]);
@@ -1489,7 +1489,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
u16 old, new;
int start;
- start = vq->last_used_idx % vq->num;
+ start = vq->last_used_idx & (vq->num - 1);
used = vq->used->ring + start;
if (count == 1) {
if (__put_user(heads[0].id, &used->id)) {
@@ -1531,7 +1531,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
{
int start, n, r;
- start = vq->last_used_idx % vq->num;
+ start = vq->last_used_idx & (vq->num - 1);
n = vq->num - start;
if (n < count) {
r = __vhost_add_used_n(vq, heads, n);
diff --git a/drivers/video/fbdev/fsl-diu-fb.c b/drivers/video/fbdev/fsl-diu-fb.c
index b335c1ae8625..fe00a07c122e 100644
--- a/drivers/video/fbdev/fsl-diu-fb.c
+++ b/drivers/video/fbdev/fsl-diu-fb.c
@@ -479,7 +479,10 @@ static enum fsl_diu_monitor_port fsl_diu_name_to_port(const char *s)
port = FSL_DIU_PORT_DLVDS;
}
- return diu_ops.valid_monitor_port(port);
+ if (diu_ops.valid_monitor_port)
+ port = diu_ops.valid_monitor_port(port);
+
+ return port;
}
/*
@@ -1915,6 +1918,14 @@ static int __init fsl_diu_init(void)
#else
monitor_port = fsl_diu_name_to_port(monitor_string);
#endif
+
+ /*
+ * Must to verify set_pixel_clock. If not implement on platform,
+ * then that means that there is no platform support for the DIU.
+ */
+ if (!diu_ops.set_pixel_clock)
+ return -ENODEV;
+
pr_info("Freescale Display Interface Unit (DIU) framebuffer driver\n");
#ifdef CONFIG_NOT_COHERENT_CACHE
diff --git a/drivers/video/fbdev/omap2/dss/venc.c b/drivers/video/fbdev/omap2/dss/venc.c
index 99ca268c1cdd..d05a54922ba6 100644
--- a/drivers/video/fbdev/omap2/dss/venc.c
+++ b/drivers/video/fbdev/omap2/dss/venc.c
@@ -275,6 +275,12 @@ const struct omap_video_timings omap_dss_pal_timings = {
.vbp = 41,
.interlace = true,
+
+ .hsync_level = OMAPDSS_SIG_ACTIVE_LOW,
+ .vsync_level = OMAPDSS_SIG_ACTIVE_LOW,
+ .data_pclk_edge = OMAPDSS_DRIVE_SIG_RISING_EDGE,
+ .de_level = OMAPDSS_SIG_ACTIVE_HIGH,
+ .sync_pclk_edge = OMAPDSS_DRIVE_SIG_FALLING_EDGE,
};
EXPORT_SYMBOL(omap_dss_pal_timings);
@@ -290,6 +296,12 @@ const struct omap_video_timings omap_dss_ntsc_timings = {
.vbp = 31,
.interlace = true,
+
+ .hsync_level = OMAPDSS_SIG_ACTIVE_LOW,
+ .vsync_level = OMAPDSS_SIG_ACTIVE_LOW,
+ .data_pclk_edge = OMAPDSS_DRIVE_SIG_RISING_EDGE,
+ .de_level = OMAPDSS_SIG_ACTIVE_HIGH,
+ .sync_pclk_edge = OMAPDSS_DRIVE_SIG_FALLING_EDGE,
};
EXPORT_SYMBOL(omap_dss_ntsc_timings);
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index b1877d73fa56..7062bb0975a5 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -412,6 +412,7 @@ static int virtio_init(void)
static void __exit virtio_exit(void)
{
bus_unregister(&virtio_bus);
+ ida_destroy(&virtio_index_ida);
}
core_initcall(virtio_init);
module_exit(virtio_exit);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 096b857e7b75..ee663c458b20 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -80,6 +80,12 @@ struct vring_virtqueue {
/* Last used index we've seen. */
u16 last_used_idx;
+ /* Last written value to avail->flags */
+ u16 avail_flags_shadow;
+
+ /* Last written value to avail->idx in guest byte order */
+ u16 avail_idx_shadow;
+
/* How to notify other side. FIXME: commonalize hcalls! */
bool (*notify)(struct virtqueue *vq);
@@ -109,7 +115,7 @@ static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
* otherwise virt_to_phys will give us bogus addresses in the
* virtqueue.
*/
- gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH);
+ gfp &= ~__GFP_HIGHMEM;
desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
if (!desc)
@@ -235,13 +241,14 @@ static inline int virtqueue_add(struct virtqueue *_vq,
/* Put entry in available array (but don't update avail->idx until they
* do sync). */
- avail = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) & (vq->vring.num - 1);
+ avail = vq->avail_idx_shadow & (vq->vring.num - 1);
vq->vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
/* Descriptors and available array need to be set before we expose the
* new available array entries. */
virtio_wmb(vq->weak_barriers);
- vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) + 1);
+ vq->avail_idx_shadow++;
+ vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
vq->num_added++;
pr_debug("Added buffer head %i to %p\n", head, vq);
@@ -354,8 +361,8 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq)
* event. */
virtio_mb(vq->weak_barriers);
- old = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - vq->num_added;
- new = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx);
+ old = vq->avail_idx_shadow - vq->num_added;
+ new = vq->avail_idx_shadow;
vq->num_added = 0;
#ifdef DEBUG
@@ -510,7 +517,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
/* If we expect an interrupt for the next entry, tell host
* by writing event index and flush out the write before
* the read in the next get_buf call. */
- if (!(vq->vring.avail->flags & cpu_to_virtio16(_vq->vdev, VRING_AVAIL_F_NO_INTERRUPT))) {
+ if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, vq->last_used_idx);
virtio_mb(vq->weak_barriers);
}
@@ -537,7 +544,11 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
- vq->vring.avail->flags |= cpu_to_virtio16(_vq->vdev, VRING_AVAIL_F_NO_INTERRUPT);
+ if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
+ vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
+ vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+ }
+
}
EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
@@ -565,7 +576,10 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
* either clear the flags bit or point the event index at the next
* entry. Always do both to keep code simple. */
- vq->vring.avail->flags &= cpu_to_virtio16(_vq->vdev, ~VRING_AVAIL_F_NO_INTERRUPT);
+ if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
+ vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
+ vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+ }
vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx);
END_USE(vq);
return last_used_idx;
@@ -633,9 +647,12 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
* either clear the flags bit or point the event index at the next
* entry. Always do both to keep code simple. */
- vq->vring.avail->flags &= cpu_to_virtio16(_vq->vdev, ~VRING_AVAIL_F_NO_INTERRUPT);
+ if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
+ vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
+ vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+ }
/* TODO: tune this threshold */
- bufs = (u16)(virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - vq->last_used_idx) * 3 / 4;
+ bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs);
virtio_mb(vq->weak_barriers);
if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) {
@@ -670,7 +687,8 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
/* detach_buf clears data, so grab it now. */
buf = vq->data[i];
detach_buf(vq, i);
- vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - 1);
+ vq->avail_idx_shadow--;
+ vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
END_USE(vq);
return buf;
}
@@ -735,6 +753,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
vq->weak_barriers = weak_barriers;
vq->broken = false;
vq->last_used_idx = 0;
+ vq->avail_flags_shadow = 0;
+ vq->avail_idx_shadow = 0;
vq->num_added = 0;
list_add_tail(&vq->vq.list, &vdev->vqs);
#ifdef DEBUG
@@ -746,8 +766,10 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
/* No callback? Tell other side not to bother us. */
- if (!callback)
- vq->vring.avail->flags |= cpu_to_virtio16(vdev, VRING_AVAIL_F_NO_INTERRUPT);
+ if (!callback) {
+ vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
+ vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow);
+ }
/* Put everything in free lists. */
vq->free_head = 0;
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 7a8a6c6952e9..1c427beffadd 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -446,7 +446,7 @@ config MAX63XX_WATCHDOG
config IMX2_WDT
tristate "IMX2+ Watchdog"
- depends on ARCH_MXC
+ depends on ARCH_MXC || ARCH_LAYERSCAPE
select REGMAP_MMIO
select WATCHDOG_CORE
help
diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c
index 6ad9df948711..b751f43d76ed 100644
--- a/drivers/watchdog/mtk_wdt.c
+++ b/drivers/watchdog/mtk_wdt.c
@@ -123,6 +123,7 @@ static int mtk_wdt_stop(struct watchdog_device *wdt_dev)
reg = readl(wdt_base + WDT_MODE);
reg &= ~WDT_MODE_EN;
+ reg |= WDT_MODE_KEY;
iowrite32(reg, wdt_base + WDT_MODE);
return 0;
diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c
index d96bee017fd3..6f17c935a6cf 100644
--- a/drivers/watchdog/omap_wdt.c
+++ b/drivers/watchdog/omap_wdt.c
@@ -205,7 +205,7 @@ static int omap_wdt_set_timeout(struct watchdog_device *wdog,
static unsigned int omap_wdt_get_timeleft(struct watchdog_device *wdog)
{
- struct omap_wdt_dev *wdev = watchdog_get_drvdata(wdog);
+ struct omap_wdt_dev *wdev = to_omap_wdt_dev(wdog);
void __iomem *base = wdev->base;
u32 value;
diff --git a/drivers/watchdog/pnx4008_wdt.c b/drivers/watchdog/pnx4008_wdt.c
index 4224b3ec83a5..313cd1c6fda0 100644
--- a/drivers/watchdog/pnx4008_wdt.c
+++ b/drivers/watchdog/pnx4008_wdt.c
@@ -80,7 +80,7 @@ static unsigned int heartbeat = DEFAULT_HEARTBEAT;
static DEFINE_SPINLOCK(io_lock);
static void __iomem *wdt_base;
-struct clk *wdt_clk;
+static struct clk *wdt_clk;
static int pnx4008_wdt_start(struct watchdog_device *wdd)
{
@@ -161,7 +161,7 @@ static int pnx4008_wdt_probe(struct platform_device *pdev)
if (IS_ERR(wdt_clk))
return PTR_ERR(wdt_clk);
- ret = clk_enable(wdt_clk);
+ ret = clk_prepare_enable(wdt_clk);
if (ret)
return ret;
@@ -184,7 +184,7 @@ static int pnx4008_wdt_probe(struct platform_device *pdev)
return 0;
disable_clk:
- clk_disable(wdt_clk);
+ clk_disable_unprepare(wdt_clk);
return ret;
}
@@ -192,7 +192,7 @@ static int pnx4008_wdt_remove(struct platform_device *pdev)
{
watchdog_unregister_device(&pnx4008_wdd);
- clk_disable(wdt_clk);
+ clk_disable_unprepare(wdt_clk);
return 0;
}
diff --git a/drivers/watchdog/tegra_wdt.c b/drivers/watchdog/tegra_wdt.c
index 7f97cdd53f29..9ec57608da82 100644
--- a/drivers/watchdog/tegra_wdt.c
+++ b/drivers/watchdog/tegra_wdt.c
@@ -140,8 +140,10 @@ static int tegra_wdt_set_timeout(struct watchdog_device *wdd,
{
wdd->timeout = timeout;
- if (watchdog_active(wdd))
+ if (watchdog_active(wdd)) {
+ tegra_wdt_stop(wdd);
return tegra_wdt_start(wdd);
+ }
return 0;
}
diff --git a/drivers/watchdog/w83977f_wdt.c b/drivers/watchdog/w83977f_wdt.c
index 91bf55a20024..20e2bba10400 100644
--- a/drivers/watchdog/w83977f_wdt.c
+++ b/drivers/watchdog/w83977f_wdt.c
@@ -224,7 +224,7 @@ static int wdt_keepalive(void)
static int wdt_set_timeout(int t)
{
- int tmrval;
+ unsigned int tmrval;
/*
* Convert seconds to watchdog counter time units, rounding up.
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 849500e4e14d..524c22146429 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -39,6 +39,7 @@
#include <asm/irq.h>
#include <asm/idle.h>
#include <asm/io_apic.h>
+#include <asm/i8259.h>
#include <asm/xen/pci.h>
#endif
#include <asm/sync_bitops.h>
@@ -420,7 +421,7 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
return xen_allocate_irq_dynamic();
/* Legacy IRQ descriptors are already allocated by the arch. */
- if (gsi < NR_IRQS_LEGACY)
+ if (gsi < nr_legacy_irqs())
irq = gsi;
else
irq = irq_alloc_desc_at(gsi, -1);
@@ -446,7 +447,7 @@ static void xen_free_irq(unsigned irq)
kfree(info);
/* Legacy IRQ descriptors are managed by the arch. */
- if (irq < NR_IRQS_LEGACY)
+ if (irq < nr_legacy_irqs())
return;
irq_free_desc(irq);
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
index e3e9e3d46d1b..96a1b8da5371 100644
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -281,7 +281,8 @@ static void handle_irq_for_port(unsigned port)
static void consume_one_event(unsigned cpu,
struct evtchn_fifo_control_block *control_block,
- unsigned priority, unsigned long *ready)
+ unsigned priority, unsigned long *ready,
+ bool drop)
{
struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
uint32_t head;
@@ -313,13 +314,17 @@ static void consume_one_event(unsigned cpu,
if (head == 0)
clear_bit(priority, ready);
- if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port))
- handle_irq_for_port(port);
+ if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) {
+ if (unlikely(drop))
+ pr_warn("Dropping pending event for port %u\n", port);
+ else
+ handle_irq_for_port(port);
+ }
q->head[priority] = head;
}
-static void evtchn_fifo_handle_events(unsigned cpu)
+static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
{
struct evtchn_fifo_control_block *control_block;
unsigned long ready;
@@ -331,11 +336,16 @@ static void evtchn_fifo_handle_events(unsigned cpu)
while (ready) {
q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES);
- consume_one_event(cpu, control_block, q, &ready);
+ consume_one_event(cpu, control_block, q, &ready, drop);
ready |= xchg(&control_block->ready, 0);
}
}
+static void evtchn_fifo_handle_events(unsigned cpu)
+{
+ __evtchn_fifo_handle_events(cpu, false);
+}
+
static void evtchn_fifo_resume(void)
{
unsigned cpu;
@@ -420,6 +430,9 @@ static int evtchn_fifo_cpu_notification(struct notifier_block *self,
if (!per_cpu(cpu_control_block, cpu))
ret = evtchn_fifo_alloc_control_block(cpu);
break;
+ case CPU_DEAD:
+ __evtchn_fifo_handle_events(cpu, true);
+ break;
default:
break;
}
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 00f40f051d95..38272ad24551 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -49,6 +49,8 @@
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
#include <xen/xen.h>
#include <xen/events.h>
@@ -58,10 +60,10 @@
struct per_user_data {
struct mutex bind_mutex; /* serialize bind/unbind operations */
struct rb_root evtchns;
+ unsigned int nr_evtchns;
/* Notification ring, accessed via /dev/xen/evtchn. */
-#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
-#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
+ unsigned int ring_size;
evtchn_port_t *ring;
unsigned int ring_cons, ring_prod, ring_overflow;
struct mutex ring_cons_mutex; /* protect against concurrent readers */
@@ -80,10 +82,41 @@ struct user_evtchn {
bool enabled;
};
+static evtchn_port_t *evtchn_alloc_ring(unsigned int size)
+{
+ evtchn_port_t *ring;
+ size_t s = size * sizeof(*ring);
+
+ ring = kmalloc(s, GFP_KERNEL);
+ if (!ring)
+ ring = vmalloc(s);
+
+ return ring;
+}
+
+static void evtchn_free_ring(evtchn_port_t *ring)
+{
+ kvfree(ring);
+}
+
+static unsigned int evtchn_ring_offset(struct per_user_data *u,
+ unsigned int idx)
+{
+ return idx & (u->ring_size - 1);
+}
+
+static evtchn_port_t *evtchn_ring_entry(struct per_user_data *u,
+ unsigned int idx)
+{
+ return u->ring + evtchn_ring_offset(u, idx);
+}
+
static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
{
struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
+ u->nr_evtchns++;
+
while (*new) {
struct user_evtchn *this;
@@ -107,6 +140,7 @@ static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
{
+ u->nr_evtchns--;
rb_erase(&evtchn->node, &u->evtchns);
kfree(evtchn);
}
@@ -144,8 +178,8 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
spin_lock(&u->ring_prod_lock);
- if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
- u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port;
+ if ((u->ring_prod - u->ring_cons) < u->ring_size) {
+ *evtchn_ring_entry(u, u->ring_prod) = evtchn->port;
wmb(); /* Ensure ring contents visible */
if (u->ring_cons == u->ring_prod++) {
wake_up_interruptible(&u->evtchn_wait);
@@ -200,10 +234,10 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
}
/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
- if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
- bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
+ if (((c ^ p) & u->ring_size) != 0) {
+ bytes1 = (u->ring_size - evtchn_ring_offset(u, c)) *
sizeof(evtchn_port_t);
- bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
+ bytes2 = evtchn_ring_offset(u, p) * sizeof(evtchn_port_t);
} else {
bytes1 = (p - c) * sizeof(evtchn_port_t);
bytes2 = 0;
@@ -219,7 +253,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
rc = -EFAULT;
rmb(); /* Ensure that we see the port before we copy it. */
- if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
+ if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) ||
((bytes2 != 0) &&
copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
goto unlock_out;
@@ -278,6 +312,66 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
return rc;
}
+static int evtchn_resize_ring(struct per_user_data *u)
+{
+ unsigned int new_size;
+ evtchn_port_t *new_ring, *old_ring;
+ unsigned int p, c;
+
+ /*
+ * Ensure the ring is large enough to capture all possible
+ * events. i.e., one free slot for each bound event.
+ */
+ if (u->nr_evtchns <= u->ring_size)
+ return 0;
+
+ if (u->ring_size == 0)
+ new_size = 64;
+ else
+ new_size = 2 * u->ring_size;
+
+ new_ring = evtchn_alloc_ring(new_size);
+ if (!new_ring)
+ return -ENOMEM;
+
+ old_ring = u->ring;
+
+ /*
+ * Access to the ring contents is serialized by either the
+ * prod /or/ cons lock so take both when resizing.
+ */
+ mutex_lock(&u->ring_cons_mutex);
+ spin_lock_irq(&u->ring_prod_lock);
+
+ /*
+ * Copy the old ring contents to the new ring.
+ *
+ * If the ring contents crosses the end of the current ring,
+ * it needs to be copied in two chunks.
+ *
+ * +---------+ +------------------+
+ * |34567 12| -> | 1234567 |
+ * +-----p-c-+ +------------------+
+ */
+ p = evtchn_ring_offset(u, u->ring_prod);
+ c = evtchn_ring_offset(u, u->ring_cons);
+ if (p < c) {
+ memcpy(new_ring + c, u->ring + c, (u->ring_size - c) * sizeof(*u->ring));
+ memcpy(new_ring + u->ring_size, u->ring, p * sizeof(*u->ring));
+ } else
+ memcpy(new_ring + c, u->ring + c, (p - c) * sizeof(*u->ring));
+
+ u->ring = new_ring;
+ u->ring_size = new_size;
+
+ spin_unlock_irq(&u->ring_prod_lock);
+ mutex_unlock(&u->ring_cons_mutex);
+
+ evtchn_free_ring(old_ring);
+
+ return 0;
+}
+
static int evtchn_bind_to_user(struct per_user_data *u, int port)
{
struct user_evtchn *evtchn;
@@ -305,6 +399,10 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
if (rc < 0)
goto err;
+ rc = evtchn_resize_ring(u);
+ if (rc < 0)
+ goto err;
+
rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0,
u->name, evtchn);
if (rc < 0)
@@ -503,13 +601,6 @@ static int evtchn_open(struct inode *inode, struct file *filp)
init_waitqueue_head(&u->evtchn_wait);
- u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
- if (u->ring == NULL) {
- kfree(u->name);
- kfree(u);
- return -ENOMEM;
- }
-
mutex_init(&u->bind_mutex);
mutex_init(&u->ring_cons_mutex);
spin_lock_init(&u->ring_prod_lock);
@@ -532,7 +623,7 @@ static int evtchn_release(struct inode *inode, struct file *filp)
evtchn_unbind_from_user(u, evtchn);
}
- free_page((unsigned long)u->ring);
+ evtchn_free_ring(u->ring);
kfree(u->name);
kfree(u);
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 2ea0b3b2a91d..1be5dd048622 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -804,7 +804,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
vma->vm_ops = &gntdev_vmops;
- vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+ vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
if (use_ptemod)
vma->vm_flags |= VM_DONTCOPY;
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
index 58e38d586f52..4d529f3e40df 100644
--- a/drivers/xen/xen-pciback/pciback.h
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -37,6 +37,7 @@ struct xen_pcibk_device {
struct xen_pci_sharedinfo *sh_info;
unsigned long flags;
struct work_struct op_work;
+ struct xen_pci_op op;
};
struct xen_pcibk_dev_data {
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index c4a0666de6f5..73dafdc494aa 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -70,6 +70,13 @@ static void xen_pcibk_control_isr(struct pci_dev *dev, int reset)
enable ? "enable" : "disable");
if (enable) {
+ /*
+ * The MSI or MSI-X should not have an IRQ handler. Otherwise
+ * if the guest terminates we BUG_ON in free_msi_irqs.
+ */
+ if (dev->msi_enabled || dev->msix_enabled)
+ goto out;
+
rc = request_irq(dev_data->irq,
xen_pcibk_guest_interrupt, IRQF_SHARED,
dev_data->irq_name, dev);
@@ -144,7 +151,12 @@ int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev,
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev));
- status = pci_enable_msi(dev);
+ if (dev->msi_enabled)
+ status = -EALREADY;
+ else if (dev->msix_enabled)
+ status = -ENXIO;
+ else
+ status = pci_enable_msi(dev);
if (status) {
pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n",
@@ -173,20 +185,23 @@ static
int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev,
struct pci_dev *dev, struct xen_pci_op *op)
{
- struct xen_pcibk_dev_data *dev_data;
-
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n",
pci_name(dev));
- pci_disable_msi(dev);
+ if (dev->msi_enabled) {
+ struct xen_pcibk_dev_data *dev_data;
+
+ pci_disable_msi(dev);
+
+ dev_data = pci_get_drvdata(dev);
+ if (dev_data)
+ dev_data->ack_intr = 1;
+ }
op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
op->value);
- dev_data = pci_get_drvdata(dev);
- if (dev_data)
- dev_data->ack_intr = 1;
return 0;
}
@@ -197,13 +212,26 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
struct xen_pcibk_dev_data *dev_data;
int i, result;
struct msix_entry *entries;
+ u16 cmd;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n",
pci_name(dev));
+
if (op->value > SH_INFO_MAX_VEC)
return -EINVAL;
+ if (dev->msix_enabled)
+ return -EALREADY;
+
+ /*
+ * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able
+ * to access the BARs where the MSI-X entries reside.
+ */
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY))
+ return -ENXIO;
+
entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
if (entries == NULL)
return -ENOMEM;
@@ -245,23 +273,27 @@ static
int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
struct pci_dev *dev, struct xen_pci_op *op)
{
- struct xen_pcibk_dev_data *dev_data;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n",
pci_name(dev));
- pci_disable_msix(dev);
+ if (dev->msix_enabled) {
+ struct xen_pcibk_dev_data *dev_data;
+
+ pci_disable_msix(dev);
+
+ dev_data = pci_get_drvdata(dev);
+ if (dev_data)
+ dev_data->ack_intr = 1;
+ }
/*
* SR-IOV devices (which don't have any legacy IRQ) have
* an undefined IRQ value of zero.
*/
op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
if (unlikely(verbose_request))
- printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev),
- op->value);
- dev_data = pci_get_drvdata(dev);
- if (dev_data)
- dev_data->ack_intr = 1;
+ printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n",
+ pci_name(dev), op->value);
return 0;
}
#endif
@@ -298,9 +330,11 @@ void xen_pcibk_do_op(struct work_struct *data)
container_of(data, struct xen_pcibk_device, op_work);
struct pci_dev *dev;
struct xen_pcibk_dev_data *dev_data = NULL;
- struct xen_pci_op *op = &pdev->sh_info->op;
+ struct xen_pci_op *op = &pdev->op;
int test_intx = 0;
+ *op = pdev->sh_info->op;
+ barrier();
dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
if (dev == NULL)
@@ -342,6 +376,17 @@ void xen_pcibk_do_op(struct work_struct *data)
if ((dev_data->enable_intx != test_intx))
xen_pcibk_control_isr(dev, 0 /* no reset */);
}
+ pdev->sh_info->op.err = op->err;
+ pdev->sh_info->op.value = op->value;
+#ifdef CONFIG_PCI_MSI
+ if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) {
+ unsigned int i;
+
+ for (i = 0; i < op->value; i++)
+ pdev->sh_info->op.msix_entries[i].vector =
+ op->msix_entries[i].vector;
+ }
+#endif
/* Tell the driver domain that we're done. */
wmb();
clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 98bc345f296e..4843741e703a 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -44,7 +44,6 @@ static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev)
dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
pdev->xdev = xdev;
- dev_set_drvdata(&xdev->dev, pdev);
mutex_init(&pdev->dev_lock);
@@ -58,6 +57,9 @@ static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev)
kfree(pdev);
pdev = NULL;
}
+
+ dev_set_drvdata(&xdev->dev, pdev);
+
out:
return pdev;
}
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 43bcae852546..ad4eb1024d1f 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -726,7 +726,7 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
if (!pending_req)
return 1;
- ring_req = *RING_GET_REQUEST(ring, rc);
+ RING_COPY_REQUEST(ring, rc, &ring_req);
ring->req_cons = ++rc;
err = prepare_pending_reqs(info, &ring_req, pending_req);
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index a7e28890f5ef..9da967f38387 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -67,8 +67,8 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
return 0;
}
/* get the default/access acl values and cache them */
- dacl = __v9fs_get_acl(fid, POSIX_ACL_XATTR_DEFAULT);
- pacl = __v9fs_get_acl(fid, POSIX_ACL_XATTR_ACCESS);
+ dacl = __v9fs_get_acl(fid, XATTR_NAME_POSIX_ACL_DEFAULT);
+ pacl = __v9fs_get_acl(fid, XATTR_NAME_POSIX_ACL_ACCESS);
if (!IS_ERR(dacl) && !IS_ERR(pacl)) {
set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl);
@@ -133,10 +133,10 @@ static int v9fs_set_acl(struct p9_fid *fid, int type, struct posix_acl *acl)
goto err_free_out;
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
BUG();
@@ -220,15 +220,12 @@ static int v9fs_xattr_get_acl(const struct xattr_handler *handler,
struct posix_acl *acl;
int error;
- if (strcmp(name, "") != 0)
- return -EINVAL;
-
v9ses = v9fs_dentry2v9ses(dentry);
/*
* We allow set/get/list of acl when access=client is not specified
*/
if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
- return v9fs_xattr_get(dentry, handler->prefix, buffer, size);
+ return v9fs_xattr_get(dentry, handler->name, buffer, size);
acl = v9fs_get_cached_acl(d_inode(dentry), handler->flags);
if (IS_ERR(acl))
@@ -250,16 +247,13 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
struct v9fs_session_info *v9ses;
struct inode *inode = d_inode(dentry);
- if (strcmp(name, "") != 0)
- return -EINVAL;
-
v9ses = v9fs_dentry2v9ses(dentry);
/*
* set the attribute on the remote. Without even looking at the
* xattr value. We leave it to the server to validate
*/
if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
- return v9fs_xattr_set(dentry, handler->prefix, value, size,
+ return v9fs_xattr_set(dentry, handler->name, value, size,
flags);
if (S_ISLNK(inode->i_mode))
@@ -319,7 +313,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
default:
BUG();
}
- retval = v9fs_xattr_set(dentry, handler->prefix, value, size, flags);
+ retval = v9fs_xattr_set(dentry, handler->name, value, size, flags);
if (!retval)
set_cached_acl(inode, handler->flags, acl);
err_out:
@@ -328,14 +322,14 @@ err_out:
}
const struct xattr_handler v9fs_xattr_acl_access_handler = {
- .prefix = POSIX_ACL_XATTR_ACCESS,
+ .name = XATTR_NAME_POSIX_ACL_ACCESS,
.flags = ACL_TYPE_ACCESS,
.get = v9fs_xattr_get_acl,
.set = v9fs_xattr_set_acl,
};
const struct xattr_handler v9fs_xattr_acl_default_handler = {
- .prefix = POSIX_ACL_XATTR_DEFAULT,
+ .name = XATTR_NAME_POSIX_ACL_DEFAULT,
.flags = ACL_TYPE_DEFAULT,
.get = v9fs_xattr_get_acl,
.set = v9fs_xattr_set_acl,
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 699941e90667..c7cc7c30f0c8 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -451,9 +451,9 @@ void v9fs_evict_inode(struct inode *inode)
{
struct v9fs_inode *v9inode = V9FS_I(inode);
- truncate_inode_pages_final(inode->i_mapping);
+ truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
- filemap_fdatawrite(inode->i_mapping);
+ filemap_fdatawrite(&inode->i_data);
v9fs_cache_inode_put_cookie(inode);
/* clunk the fid stashed in writeback_fid */
@@ -1223,18 +1223,26 @@ ino_t v9fs_qid2ino(struct p9_qid *qid)
}
/**
- * v9fs_vfs_follow_link - follow a symlink path
+ * v9fs_vfs_get_link - follow a symlink path
* @dentry: dentry for symlink
- * @cookie: place to pass the data to put_link()
+ * @inode: inode for symlink
+ * @done: delayed call for when we are done with the return value
*/
-static const char *v9fs_vfs_follow_link(struct dentry *dentry, void **cookie)
+static const char *v9fs_vfs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct v9fs_session_info *v9ses = v9fs_dentry2v9ses(dentry);
- struct p9_fid *fid = v9fs_fid_lookup(dentry);
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid;
struct p9_wstat *st;
char *res;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
+ v9ses = v9fs_dentry2v9ses(dentry);
+ fid = v9fs_fid_lookup(dentry);
p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
if (IS_ERR(fid))
@@ -1259,7 +1267,8 @@ static const char *v9fs_vfs_follow_link(struct dentry *dentry, void **cookie)
p9stat_free(st);
kfree(st);
- return *cookie = res;
+ set_delayed_call(done, kfree_link, res);
+ return res;
}
/**
@@ -1452,8 +1461,7 @@ static const struct inode_operations v9fs_file_inode_operations = {
static const struct inode_operations v9fs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = v9fs_vfs_follow_link,
- .put_link = kfree_put_link,
+ .get_link = v9fs_vfs_get_link,
.getattr = v9fs_vfs_getattr,
.setattr = v9fs_vfs_setattr,
};
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index cb899af1babc..a34702c998f5 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -899,26 +899,34 @@ error:
}
/**
- * v9fs_vfs_follow_link_dotl - follow a symlink path
+ * v9fs_vfs_get_link_dotl - follow a symlink path
* @dentry: dentry for symlink
- * @cookie: place to pass the data to put_link()
+ * @inode: inode for symlink
+ * @done: destructor for return value
*/
static const char *
-v9fs_vfs_follow_link_dotl(struct dentry *dentry, void **cookie)
+v9fs_vfs_get_link_dotl(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct p9_fid *fid = v9fs_fid_lookup(dentry);
+ struct p9_fid *fid;
char *target;
int retval;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
+ fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
return ERR_CAST(fid);
retval = p9_client_readlink(fid, &target);
if (retval)
return ERR_PTR(retval);
- return *cookie = target;
+ set_delayed_call(done, kfree_link, target);
+ return target;
}
int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
@@ -984,8 +992,7 @@ const struct inode_operations v9fs_file_inode_operations_dotl = {
const struct inode_operations v9fs_symlink_inode_operations_dotl = {
.readlink = generic_readlink,
- .follow_link = v9fs_vfs_follow_link_dotl,
- .put_link = kfree_put_link,
+ .get_link = v9fs_vfs_get_link_dotl,
.getattr = v9fs_vfs_getattr_dotl,
.setattr = v9fs_vfs_setattr_dotl,
.setxattr = generic_setxattr,
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index e3d026ac382e..9dd9b47a6c1a 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -143,8 +143,6 @@ static int v9fs_xattr_handler_get(const struct xattr_handler *handler,
{
const char *full_name = xattr_full_name(handler, name);
- if (strcmp(name, "") == 0)
- return -EINVAL;
return v9fs_xattr_get(dentry, full_name, buffer, size);
}
@@ -154,8 +152,6 @@ static int v9fs_xattr_handler_set(const struct xattr_handler *handler,
{
const char *full_name = xattr_full_name(handler, name);
- if (strcmp(name, "") == 0)
- return -EINVAL;
return v9fs_xattr_set(dentry, full_name, value, size, flags);
}
diff --git a/fs/Kconfig b/fs/Kconfig
index da3f32f1a4e4..6ce72d8d1ee1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -46,6 +46,12 @@ config FS_DAX
or if unsure, say N. Saying Y will increase the size of the kernel
by about 5kB.
+config FS_DAX_PMD
+ bool
+ default FS_DAX
+ depends on FS_DAX
+ depends on BROKEN
+
endif # BLOCK
# Posix ACL utility routines
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 17349500592d..0fdb0f5b2239 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -140,6 +140,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
break;
case ST_SOFTLINK:
inode->i_mode |= S_IFLNK;
+ inode_nohighmem(inode);
inode->i_op = &affs_symlink_inode_operations;
inode->i_data.a_ops = &affs_symlink_aops;
break;
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 181e05b46e72..00d3002a6780 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -344,6 +344,7 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
return -ENOSPC;
inode->i_op = &affs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &affs_symlink_aops;
inode->i_mode = S_IFLNK | 0777;
mode_to_prot(inode);
diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c
index ea5b69a18ba9..69b03dbb792f 100644
--- a/fs/affs/symlink.c
+++ b/fs/affs/symlink.c
@@ -14,13 +14,13 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
{
struct buffer_head *bh;
struct inode *inode = page->mapping->host;
- char *link = kmap(page);
+ char *link = page_address(page);
struct slink_front *lf;
int i, j;
char c;
char lc;
- pr_debug("follow_link(ino=%lu)\n", inode->i_ino);
+ pr_debug("get_link(ino=%lu)\n", inode->i_ino);
bh = affs_bread(inode->i_sb, inode->i_ino);
if (!bh)
@@ -57,12 +57,10 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
link[i] = '\0';
affs_brelse(bh);
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
fail:
SetPageError(page);
- kunmap(page);
unlock_page(page);
return -EIO;
}
@@ -73,7 +71,6 @@ const struct address_space_operations affs_symlink_aops = {
const struct inode_operations affs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = affs_notify_change,
};
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index e06f5a23352a..86cc7264c21c 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -56,6 +56,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
case AFS_FTYPE_SYMLINK:
inode->i_mode = S_IFLNK | vnode->status.mode;
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
break;
default:
printk("kAFS: AFS vnode with undefined type\n");
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c
index da0c33481bc0..84e037d1d129 100644
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -12,10 +12,16 @@
#include "autofs_i.h"
-static const char *autofs4_follow_link(struct dentry *dentry, void **cookie)
+static const char *autofs4_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
- struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ struct autofs_sb_info *sbi;
+ struct autofs_info *ino;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+ sbi = autofs4_sbi(dentry->d_sb);
+ ino = autofs4_dentry_ino(dentry);
if (ino && !autofs4_oz_mode(sbi))
ino->last_used = jiffies;
return d_inode(dentry)->i_private;
@@ -23,5 +29,5 @@ static const char *autofs4_follow_link(struct dentry *dentry, void **cookie)
const struct inode_operations autofs4_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = autofs4_follow_link
+ .get_link = autofs4_get_link
};
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 46aedacfa6a8..25250fa87086 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -42,7 +42,7 @@ static struct inode *befs_iget(struct super_block *, unsigned long);
static struct inode *befs_alloc_inode(struct super_block *sb);
static void befs_destroy_inode(struct inode *inode);
static void befs_destroy_inodecache(void);
-static const char *befs_follow_link(struct dentry *, void **);
+static int befs_symlink_readpage(struct file *, struct page *);
static int befs_utf2nls(struct super_block *sb, const char *in, int in_len,
char **out, int *out_len);
static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
@@ -79,10 +79,8 @@ static const struct address_space_operations befs_aops = {
.bmap = befs_bmap,
};
-static const struct inode_operations befs_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = befs_follow_link,
- .put_link = kfree_put_link,
+static const struct address_space_operations befs_symlink_aops = {
+ .readpage = befs_symlink_readpage,
};
/*
@@ -398,7 +396,9 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
inode->i_fop = &befs_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
- inode->i_op = &befs_symlink_inode_operations;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
+ inode->i_mapping->a_ops = &befs_symlink_aops;
} else {
inode->i_link = befs_ino->i_data.symlink;
inode->i_op = &simple_symlink_inode_operations;
@@ -463,31 +463,33 @@ befs_destroy_inodecache(void)
* The data stream become link name. Unless the LONG_SYMLINK
* flag is set.
*/
-static const char *
-befs_follow_link(struct dentry *dentry, void **cookie)
+static int befs_symlink_readpage(struct file *unused, struct page *page)
{
- struct super_block *sb = dentry->d_sb;
- struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry));
+ struct inode *inode = page->mapping->host;
+ struct super_block *sb = inode->i_sb;
+ struct befs_inode_info *befs_ino = BEFS_I(inode);
befs_data_stream *data = &befs_ino->i_data.ds;
befs_off_t len = data->size;
- char *link;
+ char *link = page_address(page);
- if (len == 0) {
+ if (len == 0 || len > PAGE_SIZE) {
befs_error(sb, "Long symlink with illegal length");
- return ERR_PTR(-EIO);
+ goto fail;
}
befs_debug(sb, "Follow long symlink");
- link = kmalloc(len, GFP_NOFS);
- if (!link)
- return ERR_PTR(-ENOMEM);
if (befs_read_lsymlink(sb, data, link, len) != len) {
- kfree(link);
befs_error(sb, "Failed to read entire long symlink");
- return ERR_PTR(-EIO);
+ goto fail;
}
link[len - 1] = '\0';
- return *cookie = link;
+ SetPageUptodate(page);
+ unlock_page(page);
+ return 0;
+fail:
+ SetPageError(page);
+ unlock_page(page);
+ return -EIO;
}
/*
diff --git a/fs/block_dev.c b/fs/block_dev.c
index bb0dfb1c7af1..44d4a1e9244e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -390,9 +390,17 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
struct page *page)
{
const struct block_device_operations *ops = bdev->bd_disk->fops;
+ int result = -EOPNOTSUPP;
+
if (!ops->rw_page || bdev_get_integrity(bdev))
- return -EOPNOTSUPP;
- return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+ return result;
+
+ result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
+ if (result)
+ return result;
+ result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+ blk_queue_exit(bdev->bd_queue);
+ return result;
}
EXPORT_SYMBOL_GPL(bdev_read_page);
@@ -421,14 +429,20 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
int result;
int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
const struct block_device_operations *ops = bdev->bd_disk->fops;
+
if (!ops->rw_page || bdev_get_integrity(bdev))
return -EOPNOTSUPP;
+ result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
+ if (result)
+ return result;
+
set_page_writeback(page);
result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
if (result)
end_page_writeback(page);
else
unlock_page(page);
+ blk_queue_exit(bdev->bd_queue);
return result;
}
EXPORT_SYMBOL_GPL(bdev_write_page);
@@ -1509,11 +1523,14 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
WARN_ON_ONCE(bdev->bd_holders);
sync_blockdev(bdev);
kill_bdev(bdev);
+
+ bdev_write_inode(bdev);
/*
- * ->release can cause the queue to disappear, so flush all
- * dirty data before.
+ * Detaching bdev inode from its wb in __destroy_inode()
+ * is too late: the queue which embeds its bdi (along with
+ * root wb) can be gone as soon as we put_disk() below.
*/
- bdev_write_inode(bdev);
+ inode_detach_wb(bdev->bd_inode);
}
if (bdev->bd_contains == bdev) {
if (disk->fops->release)
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 9a0124a95851..f89db0c21b51 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -37,10 +37,10 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
BUG();
@@ -81,7 +81,7 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
ret = posix_acl_equiv_mode(acl, &inode->i_mode);
if (ret < 0)
@@ -94,7 +94,7 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
case ACL_TYPE_DEFAULT:
if (!S_ISDIR(inode->i_mode))
return acl ? -EINVAL : 0;
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
return -EINVAL;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 6dcdb2ec9211..d453d62ab0c6 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -355,7 +355,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
index = srcu_read_lock(&fs_info->subvol_srcu);
- root = btrfs_read_fs_root_no_name(fs_info, &root_key);
+ root = btrfs_get_fs_root(fs_info, &root_key, false);
if (IS_ERR(root)) {
srcu_read_unlock(&fs_info->subvol_srcu, index);
ret = PTR_ERR(root);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8c58191249cc..35489e7129a7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3416,6 +3416,7 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *btrfs_lookup_block_group(
struct btrfs_fs_info *info,
u64 bytenr);
+void btrfs_get_block_group(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int get_block_group_index(struct btrfs_block_group_cache *cache);
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
@@ -3479,6 +3480,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytes_used,
u64 type, u64 chunk_objectid, u64 chunk_offset,
u64 size);
+struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
+ struct btrfs_fs_info *fs_info,
+ const u64 chunk_offset);
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start,
struct extent_map *em);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 974be09e7556..42a378a4eefb 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -923,7 +923,7 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags)
if (bio_flags & EXTENT_BIO_TREE_LOG)
return 0;
#ifdef CONFIG_X86
- if (cpu_has_xmm4_2)
+ if (static_cpu_has_safe(X86_FEATURE_XMM4_2))
return 0;
#endif
return 1;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index acf3ed11cfb6..c4661db2b72a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -124,7 +124,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
return (cache->flags & bits) == bits;
}
-static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
+void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
{
atomic_inc(&cache->count);
}
@@ -5915,19 +5915,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
set_extent_dirty(info->pinned_extents,
bytenr, bytenr + num_bytes - 1,
GFP_NOFS | __GFP_NOFAIL);
- /*
- * No longer have used bytes in this block group, queue
- * it for deletion.
- */
- if (old_val == 0) {
- spin_lock(&info->unused_bgs_lock);
- if (list_empty(&cache->bg_list)) {
- btrfs_get_block_group(cache);
- list_add_tail(&cache->bg_list,
- &info->unused_bgs);
- }
- spin_unlock(&info->unused_bgs_lock);
- }
}
spin_lock(&trans->transaction->dirty_bgs_lock);
@@ -5939,6 +5926,22 @@ static int update_block_group(struct btrfs_trans_handle *trans,
}
spin_unlock(&trans->transaction->dirty_bgs_lock);
+ /*
+ * No longer have used bytes in this block group, queue it for
+ * deletion. We do this after adding the block group to the
+ * dirty list to avoid races between cleaner kthread and space
+ * cache writeout.
+ */
+ if (!alloc && old_val == 0) {
+ spin_lock(&info->unused_bgs_lock);
+ if (list_empty(&cache->bg_list)) {
+ btrfs_get_block_group(cache);
+ list_add_tail(&cache->bg_list,
+ &info->unused_bgs);
+ }
+ spin_unlock(&info->unused_bgs_lock);
+ }
+
btrfs_put_block_group(cache);
total -= num_bytes;
bytenr += num_bytes;
@@ -8105,21 +8108,47 @@ reada:
}
/*
- * TODO: Modify related function to add related node/leaf to dirty_extent_root,
- * for later qgroup accounting.
- *
- * Current, this function does nothing.
+ * These may not be seen by the usual inc/dec ref code so we have to
+ * add them here.
*/
+static int record_one_subtree_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes)
+{
+ struct btrfs_qgroup_extent_record *qrecord;
+ struct btrfs_delayed_ref_root *delayed_refs;
+
+ qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS);
+ if (!qrecord)
+ return -ENOMEM;
+
+ qrecord->bytenr = bytenr;
+ qrecord->num_bytes = num_bytes;
+ qrecord->old_roots = NULL;
+
+ delayed_refs = &trans->transaction->delayed_refs;
+ spin_lock(&delayed_refs->lock);
+ if (btrfs_qgroup_insert_dirty_extent(delayed_refs, qrecord))
+ kfree(qrecord);
+ spin_unlock(&delayed_refs->lock);
+
+ return 0;
+}
+
static int account_leaf_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *eb)
{
int nr = btrfs_header_nritems(eb);
- int i, extent_type;
+ int i, extent_type, ret;
struct btrfs_key key;
struct btrfs_file_extent_item *fi;
u64 bytenr, num_bytes;
+ /* We can be called directly from walk_up_proc() */
+ if (!root->fs_info->quota_enabled)
+ return 0;
+
for (i = 0; i < nr; i++) {
btrfs_item_key_to_cpu(eb, &key, i);
@@ -8138,6 +8167,10 @@ static int account_leaf_items(struct btrfs_trans_handle *trans,
continue;
num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
+
+ ret = record_one_subtree_extent(trans, root, bytenr, num_bytes);
+ if (ret)
+ return ret;
}
return 0;
}
@@ -8206,8 +8239,6 @@ static int adjust_slots_upwards(struct btrfs_root *root,
/*
* root_eb is the subtree root and is locked before this function is called.
- * TODO: Modify this function to mark all (including complete shared node)
- * to dirty_extent_root to allow it get accounted in qgroup.
*/
static int account_shared_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -8285,6 +8316,11 @@ walk_down:
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
+
+ ret = record_one_subtree_extent(trans, root, child_bytenr,
+ root->nodesize);
+ if (ret)
+ goto out;
}
if (level == 0) {
@@ -10256,6 +10292,47 @@ out:
return ret;
}
+struct btrfs_trans_handle *
+btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
+ const u64 chunk_offset)
+{
+ struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+ struct extent_map *em;
+ struct map_lookup *map;
+ unsigned int num_items;
+
+ read_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, chunk_offset, 1);
+ read_unlock(&em_tree->lock);
+ ASSERT(em && em->start == chunk_offset);
+
+ /*
+ * We need to reserve 3 + N units from the metadata space info in order
+ * to remove a block group (done at btrfs_remove_chunk() and at
+ * btrfs_remove_block_group()), which are used for:
+ *
+ * 1 unit for adding the free space inode's orphan (located in the tree
+ * of tree roots).
+ * 1 unit for deleting the block group item (located in the extent
+ * tree).
+ * 1 unit for deleting the free space item (located in tree of tree
+ * roots).
+ * N units for deleting N device extent items corresponding to each
+ * stripe (located in the device tree).
+ *
+ * In order to remove a block group we also need to reserve units in the
+ * system space info in order to update the chunk tree (update one or
+ * more device items and remove one chunk item), but this is done at
+ * btrfs_remove_chunk() through a call to check_system_chunk().
+ */
+ map = (struct map_lookup *)em->bdev;
+ num_items = 3 + map->num_stripes;
+ free_extent_map(em);
+
+ return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
+ num_items, 1);
+}
+
/*
* Process the unused_bgs list and remove any that don't have any allocated
* space inside of them.
@@ -10322,8 +10399,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* Want to do this before we do anything else so we can recover
* properly if we fail to join the transaction.
*/
- /* 1 for btrfs_orphan_reserve_metadata() */
- trans = btrfs_start_transaction(root, 1);
+ trans = btrfs_start_trans_remove_block_group(fs_info,
+ block_group->key.objectid);
if (IS_ERR(trans)) {
btrfs_dec_block_group_ro(root, block_group);
ret = PTR_ERR(trans);
@@ -10403,11 +10480,15 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* until transaction commit to do the actual discard.
*/
if (trimming) {
- WARN_ON(!list_empty(&block_group->bg_list));
- spin_lock(&trans->transaction->deleted_bgs_lock);
+ spin_lock(&fs_info->unused_bgs_lock);
+ /*
+ * A concurrent scrub might have added us to the list
+ * fs_info->unused_bgs, so use a list_move operation
+ * to add the block group to the deleted_bgs list.
+ */
list_move(&block_group->bg_list,
&trans->transaction->deleted_bgs);
- spin_unlock(&trans->transaction->deleted_bgs_lock);
+ spin_unlock(&fs_info->unused_bgs_lock);
btrfs_get_block_group(block_group);
}
end_trans:
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 977e715f0bf2..0f09526aa7d9 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1291,7 +1291,8 @@ out:
* on error we return an unlocked page and the error value
* on success we return a locked page and 0
*/
-static int prepare_uptodate_page(struct page *page, u64 pos,
+static int prepare_uptodate_page(struct inode *inode,
+ struct page *page, u64 pos,
bool force_uptodate)
{
int ret = 0;
@@ -1306,6 +1307,10 @@ static int prepare_uptodate_page(struct page *page, u64 pos,
unlock_page(page);
return -EIO;
}
+ if (page->mapping != inode->i_mapping) {
+ unlock_page(page);
+ return -EAGAIN;
+ }
}
return 0;
}
@@ -1324,6 +1329,7 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages,
int faili;
for (i = 0; i < num_pages; i++) {
+again:
pages[i] = find_or_create_page(inode->i_mapping, index + i,
mask | __GFP_WRITE);
if (!pages[i]) {
@@ -1333,13 +1339,17 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages,
}
if (i == 0)
- err = prepare_uptodate_page(pages[i], pos,
+ err = prepare_uptodate_page(inode, pages[i], pos,
force_uptodate);
- if (i == num_pages - 1)
- err = prepare_uptodate_page(pages[i],
+ if (!err && i == num_pages - 1)
+ err = prepare_uptodate_page(inode, pages[i],
pos + write_bytes, false);
if (err) {
page_cache_release(pages[i]);
+ if (err == -EAGAIN) {
+ err = 0;
+ goto again;
+ }
faili = i - 1;
goto fail;
}
@@ -1882,8 +1892,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
struct btrfs_log_ctx ctx;
int ret = 0;
bool full_sync = 0;
- const u64 len = end - start + 1;
+ u64 len;
+ /*
+ * The range length can be represented by u64, we have to do the typecasts
+ * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync()
+ */
+ len = (u64)end - (u64)start + 1;
trace_btrfs_sync_file(file, datasync);
/*
@@ -2071,8 +2086,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
}
}
if (!full_sync) {
- ret = btrfs_wait_ordered_range(inode, start,
- end - start + 1);
+ ret = btrfs_wait_ordered_range(inode, start, len);
if (ret) {
btrfs_end_transaction(trans, root);
goto out;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 85a1f8621b51..cfe99bec49de 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -891,7 +891,7 @@ out:
spin_unlock(&block_group->lock);
ret = 0;
- btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuild it now",
+ btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuilding it now",
block_group->key.objectid);
}
@@ -2972,7 +2972,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
u64 cont1_bytes, u64 min_bytes)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *entry;
+ struct btrfs_free_space *entry = NULL;
int ret = -ENOSPC;
u64 bitmap_offset = offset_to_bitmap(ctl, offset);
@@ -2983,8 +2983,10 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
* The bitmap that covers offset won't be in the list unless offset
* is just its start offset.
*/
- entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
- if (entry->offset != bitmap_offset) {
+ if (!list_empty(bitmaps))
+ entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
+
+ if (!entry || entry->offset != bitmap_offset) {
entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
if (entry && list_empty(&entry->list))
list_add(&entry->list, bitmaps);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 994490d5fa64..3b8856e182ae 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3550,10 +3550,10 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
int scanned = 0;
if (!xattr_access) {
- xattr_access = btrfs_name_hash(POSIX_ACL_XATTR_ACCESS,
- strlen(POSIX_ACL_XATTR_ACCESS));
- xattr_default = btrfs_name_hash(POSIX_ACL_XATTR_DEFAULT,
- strlen(POSIX_ACL_XATTR_DEFAULT));
+ xattr_access = btrfs_name_hash(XATTR_NAME_POSIX_ACL_ACCESS,
+ strlen(XATTR_NAME_POSIX_ACL_ACCESS));
+ xattr_default = btrfs_name_hash(XATTR_NAME_POSIX_ACL_DEFAULT,
+ strlen(XATTR_NAME_POSIX_ACL_DEFAULT));
}
slot++;
@@ -3774,6 +3774,7 @@ cache_acl:
break;
case S_IFLNK:
inode->i_op = &btrfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &btrfs_symlink_aops;
break;
default:
@@ -4046,9 +4047,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
*/
static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
{
- struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root;
- int ret;
/*
* 1 for the possible orphan item
@@ -4057,27 +4056,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
* 1 for the inode ref
* 1 for the inode
*/
- trans = btrfs_start_transaction(root, 5);
- if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
- return trans;
-
- if (PTR_ERR(trans) == -ENOSPC) {
- u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
-
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans))
- return trans;
- ret = btrfs_cond_migrate_bytes(root->fs_info,
- &root->fs_info->trans_block_rsv,
- num_bytes, 5);
- if (ret) {
- btrfs_end_transaction(trans, root);
- return ERR_PTR(ret);
- }
- trans->block_rsv = &root->fs_info->trans_block_rsv;
- trans->bytes_reserved = num_bytes;
- }
- return trans;
+ return btrfs_start_transaction_fallback_global_rsv(root, 5, 5);
}
static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
@@ -9727,6 +9706,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
btrfs_free_path(path);
inode->i_op = &btrfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &btrfs_symlink_aops;
inode_set_bytes(inode, name_len);
btrfs_i_size_write(inode, name_len);
@@ -10016,7 +9996,7 @@ static const struct inode_operations btrfs_dir_inode_operations = {
.setattr = btrfs_setattr,
.mknod = btrfs_mknod,
.setxattr = btrfs_setxattr,
- .getxattr = btrfs_getxattr,
+ .getxattr = generic_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
@@ -10093,7 +10073,7 @@ static const struct inode_operations btrfs_file_inode_operations = {
.getattr = btrfs_getattr,
.setattr = btrfs_setattr,
.setxattr = btrfs_setxattr,
- .getxattr = btrfs_getxattr,
+ .getxattr = generic_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
@@ -10107,7 +10087,7 @@ static const struct inode_operations btrfs_special_inode_operations = {
.setattr = btrfs_setattr,
.permission = btrfs_permission,
.setxattr = btrfs_setxattr,
- .getxattr = btrfs_getxattr,
+ .getxattr = generic_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.get_acl = btrfs_get_acl,
@@ -10116,13 +10096,12 @@ static const struct inode_operations btrfs_special_inode_operations = {
};
static const struct inode_operations btrfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.getattr = btrfs_getattr,
.setattr = btrfs_setattr,
.permission = btrfs_permission,
.setxattr = btrfs_setxattr,
- .getxattr = btrfs_getxattr,
+ .getxattr = generic_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.update_time = btrfs_update_time,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 93e12c18ffd7..5279fdae7142 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -993,9 +993,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!fs_info->quota_root)
goto out;
- spin_lock(&fs_info->qgroup_lock);
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
+ btrfs_qgroup_wait_for_completion(fs_info);
+ spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
fs_info->quota_root = NULL;
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
@@ -1461,6 +1462,8 @@ struct btrfs_qgroup_extent_record
struct btrfs_qgroup_extent_record *entry;
u64 bytenr = record->bytenr;
+ assert_spin_locked(&delayed_refs->lock);
+
while (*p) {
parent_node = *p;
entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 2907a77fb1f6..b091d94ceef6 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3432,7 +3432,9 @@ out:
static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
struct btrfs_device *scrub_dev,
u64 chunk_offset, u64 length,
- u64 dev_offset, int is_dev_replace)
+ u64 dev_offset,
+ struct btrfs_block_group_cache *cache,
+ int is_dev_replace)
{
struct btrfs_mapping_tree *map_tree =
&sctx->dev_root->fs_info->mapping_tree;
@@ -3445,8 +3447,18 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
read_unlock(&map_tree->map_tree.lock);
- if (!em)
- return -EINVAL;
+ if (!em) {
+ /*
+ * Might have been an unused block group deleted by the cleaner
+ * kthread or relocation.
+ */
+ spin_lock(&cache->lock);
+ if (!cache->removed)
+ ret = -EINVAL;
+ spin_unlock(&cache->lock);
+
+ return ret;
+ }
map = (struct map_lookup *)em->bdev;
if (em->start != chunk_offset)
@@ -3483,6 +3495,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
u64 length;
u64 chunk_offset;
int ret = 0;
+ int ro_set;
int slot;
struct extent_buffer *l;
struct btrfs_key key;
@@ -3568,7 +3581,21 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
scrub_pause_on(fs_info);
ret = btrfs_inc_block_group_ro(root, cache);
scrub_pause_off(fs_info);
- if (ret) {
+
+ if (ret == 0) {
+ ro_set = 1;
+ } else if (ret == -ENOSPC) {
+ /*
+ * btrfs_inc_block_group_ro return -ENOSPC when it
+ * failed in creating new chunk for metadata.
+ * It is not a problem for scrub/replace, because
+ * metadata are always cowed, and our scrub paused
+ * commit_transactions.
+ */
+ ro_set = 0;
+ } else {
+ btrfs_warn(fs_info, "failed setting block group ro, ret=%d\n",
+ ret);
btrfs_put_block_group(cache);
break;
}
@@ -3577,7 +3604,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
dev_replace->cursor_left = found_key.offset;
dev_replace->item_needs_writeback = 1;
ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
- found_key.offset, is_dev_replace);
+ found_key.offset, cache, is_dev_replace);
/*
* flush, submit all pending read and write bios, afterwards
@@ -3611,7 +3638,30 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
scrub_pause_off(fs_info);
- btrfs_dec_block_group_ro(root, cache);
+ if (ro_set)
+ btrfs_dec_block_group_ro(root, cache);
+
+ /*
+ * We might have prevented the cleaner kthread from deleting
+ * this block group if it was already unused because we raced
+ * and set it to RO mode first. So add it back to the unused
+ * list, otherwise it might not ever be deleted unless a manual
+ * balance is triggered or it becomes used and unused again.
+ */
+ spin_lock(&cache->lock);
+ if (!cache->removed && !cache->ro && cache->reserved == 0 &&
+ btrfs_block_group_used(&cache->item) == 0) {
+ spin_unlock(&cache->lock);
+ spin_lock(&fs_info->unused_bgs_lock);
+ if (list_empty(&cache->bg_list)) {
+ btrfs_get_block_group(cache);
+ list_add_tail(&cache->bg_list,
+ &fs_info->unused_bgs);
+ }
+ spin_unlock(&fs_info->unused_bgs_lock);
+ } else {
+ spin_unlock(&cache->lock);
+ }
btrfs_put_block_group(cache);
if (ret)
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index c8c3d70c31ff..8b72b005bfb9 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -898,8 +898,10 @@ int btrfs_test_free_space_cache(void)
}
root = btrfs_alloc_dummy_root();
- if (!root)
+ if (IS_ERR(root)) {
+ ret = PTR_ERR(root);
goto out;
+ }
root->fs_info = btrfs_alloc_dummy_fs_info();
if (!root->fs_info)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 418c6a2ad7d8..be8eae80ff65 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -274,7 +274,6 @@ loop:
cur_trans->num_dirty_bgs = 0;
spin_lock_init(&cur_trans->dirty_bgs_lock);
INIT_LIST_HEAD(&cur_trans->deleted_bgs);
- spin_lock_init(&cur_trans->deleted_bgs_lock);
spin_lock_init(&cur_trans->dropped_roots_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(&cur_trans->dirty_pages,
@@ -592,6 +591,38 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
return start_transaction(root, num_items, TRANS_START,
BTRFS_RESERVE_FLUSH_ALL);
}
+struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
+ struct btrfs_root *root,
+ unsigned int num_items,
+ int min_factor)
+{
+ struct btrfs_trans_handle *trans;
+ u64 num_bytes;
+ int ret;
+
+ trans = btrfs_start_transaction(root, num_items);
+ if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
+ return trans;
+
+ trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans))
+ return trans;
+
+ num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
+ ret = btrfs_cond_migrate_bytes(root->fs_info,
+ &root->fs_info->trans_block_rsv,
+ num_bytes,
+ min_factor);
+ if (ret) {
+ btrfs_end_transaction(trans, root);
+ return ERR_PTR(ret);
+ }
+
+ trans->block_rsv = &root->fs_info->trans_block_rsv;
+ trans->bytes_reserved = num_bytes;
+
+ return trans;
+}
struct btrfs_trans_handle *btrfs_start_transaction_lflush(
struct btrfs_root *root,
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index b05b2f64d913..64c8221b6165 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -77,8 +77,8 @@ struct btrfs_transaction {
*/
struct mutex cache_write_mutex;
spinlock_t dirty_bgs_lock;
+ /* Protected by spin lock fs_info->unused_bgs_lock. */
struct list_head deleted_bgs;
- spinlock_t deleted_bgs_lock;
spinlock_t dropped_roots_lock;
struct btrfs_delayed_ref_root delayed_refs;
int aborted;
@@ -185,6 +185,10 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
unsigned int num_items);
+struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
+ struct btrfs_root *root,
+ unsigned int num_items,
+ int min_factor);
struct btrfs_trans_handle *btrfs_start_transaction_lflush(
struct btrfs_root *root,
unsigned int num_items);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a6df8fdc1312..a23399e8e3ab 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1973,8 +1973,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
if (srcdev->writeable) {
fs_devices->rw_devices--;
/* zero out the old super if it is writable */
- btrfs_scratch_superblocks(srcdev->bdev,
- rcu_str_deref(srcdev->name));
+ btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
}
if (srcdev->bdev)
@@ -2024,8 +2023,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);
if (tgtdev->bdev) {
- btrfs_scratch_superblocks(tgtdev->bdev,
- rcu_str_deref(tgtdev->name));
+ btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
fs_info->fs_devices->open_devices--;
}
fs_info->fs_devices->num_devices--;
@@ -2853,7 +2851,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset)
if (ret)
return ret;
- trans = btrfs_start_transaction(root, 0);
+ trans = btrfs_start_trans_remove_block_group(root->fs_info,
+ chunk_offset);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
btrfs_std_error(root->fs_info, ret, NULL);
@@ -3123,7 +3122,7 @@ static int chunk_profiles_filter(u64 chunk_type,
return 1;
}
-static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
+static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
struct btrfs_balance_args *bargs)
{
struct btrfs_block_group_cache *cache;
@@ -3156,7 +3155,7 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
return ret;
}
-static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info,
+static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
u64 chunk_offset, struct btrfs_balance_args *bargs)
{
struct btrfs_block_group_cache *cache;
@@ -3549,12 +3548,11 @@ again:
ret = btrfs_force_chunk_alloc(trans, chunk_root,
BTRFS_BLOCK_GROUP_DATA);
+ btrfs_end_transaction(trans, chunk_root);
if (ret < 0) {
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
goto error;
}
-
- btrfs_end_transaction(trans, chunk_root);
chunk_reserved = 1;
}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index ec5712372732..d5c84f6b1353 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -382,7 +382,7 @@ struct map_lookup {
#define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5)
#define BTRFS_BALANCE_ARGS_LIMIT_RANGE (1ULL << 6)
#define BTRFS_BALANCE_ARGS_STRIPES_RANGE (1ULL << 7)
-#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 8)
+#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 10)
#define BTRFS_BALANCE_ARGS_MASK \
(BTRFS_BALANCE_ARGS_PROFILES | \
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 1fcd7b6e7564..7cbef1a14fe1 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -351,137 +351,89 @@ err:
return ret;
}
-/*
- * List of handlers for synthetic system.* attributes. All real ondisk
- * attributes are handled directly.
- */
-const struct xattr_handler *btrfs_xattr_handlers[] = {
-#ifdef CONFIG_BTRFS_FS_POSIX_ACL
- &posix_acl_access_xattr_handler,
- &posix_acl_default_xattr_handler,
-#endif
- NULL,
-};
-
-/*
- * Check if the attribute is in a supported namespace.
- *
- * This is applied after the check for the synthetic attributes in the system
- * namespace.
- */
-static int btrfs_is_valid_xattr(const char *name)
+static int btrfs_xattr_handler_get(const struct xattr_handler *handler,
+ struct dentry *dentry, const char *name,
+ void *buffer, size_t size)
{
- int len = strlen(name);
- int prefixlen = 0;
-
- if (!strncmp(name, XATTR_SECURITY_PREFIX,
- XATTR_SECURITY_PREFIX_LEN))
- prefixlen = XATTR_SECURITY_PREFIX_LEN;
- else if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- prefixlen = XATTR_SYSTEM_PREFIX_LEN;
- else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
- prefixlen = XATTR_TRUSTED_PREFIX_LEN;
- else if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
- prefixlen = XATTR_USER_PREFIX_LEN;
- else if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
- prefixlen = XATTR_BTRFS_PREFIX_LEN;
- else
- return -EOPNOTSUPP;
-
- /*
- * The name cannot consist of just prefix
- */
- if (len <= prefixlen)
- return -EINVAL;
+ struct inode *inode = d_inode(dentry);
- return 0;
+ name = xattr_full_name(handler, name);
+ return __btrfs_getxattr(inode, name, buffer, size);
}
-ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
- void *buffer, size_t size)
+static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
+ struct dentry *dentry, const char *name,
+ const void *buffer, size_t size,
+ int flags)
{
- int ret;
+ struct inode *inode = d_inode(dentry);
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_getxattr(dentry, name, buffer, size);
+ name = xattr_full_name(handler, name);
+ return __btrfs_setxattr(NULL, inode, name, buffer, size, flags);
+}
- ret = btrfs_is_valid_xattr(name);
- if (ret)
- return ret;
- return __btrfs_getxattr(d_inode(dentry), name, buffer, size);
+static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
+ struct dentry *dentry,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ name = xattr_full_name(handler, name);
+ return btrfs_set_prop(d_inode(dentry), name, value, size, flags);
}
+static const struct xattr_handler btrfs_security_xattr_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .get = btrfs_xattr_handler_get,
+ .set = btrfs_xattr_handler_set,
+};
+
+static const struct xattr_handler btrfs_trusted_xattr_handler = {
+ .prefix = XATTR_TRUSTED_PREFIX,
+ .get = btrfs_xattr_handler_get,
+ .set = btrfs_xattr_handler_set,
+};
+
+static const struct xattr_handler btrfs_user_xattr_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .get = btrfs_xattr_handler_get,
+ .set = btrfs_xattr_handler_set,
+};
+
+static const struct xattr_handler btrfs_btrfs_xattr_handler = {
+ .prefix = XATTR_BTRFS_PREFIX,
+ .get = btrfs_xattr_handler_get,
+ .set = btrfs_xattr_handler_set_prop,
+};
+
+const struct xattr_handler *btrfs_xattr_handlers[] = {
+ &btrfs_security_xattr_handler,
+#ifdef CONFIG_BTRFS_FS_POSIX_ACL
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
+#endif
+ &btrfs_trusted_xattr_handler,
+ &btrfs_user_xattr_handler,
+ &btrfs_btrfs_xattr_handler,
+ NULL,
+};
+
int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
size_t size, int flags)
{
struct btrfs_root *root = BTRFS_I(d_inode(dentry))->root;
- int ret;
- /*
- * The permission on security.* and system.* is not checked
- * in permission().
- */
if (btrfs_root_readonly(root))
return -EROFS;
-
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_setxattr(dentry, name, value, size, flags);
-
- ret = btrfs_is_valid_xattr(name);
- if (ret)
- return ret;
-
- if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
- return btrfs_set_prop(d_inode(dentry), name,
- value, size, flags);
-
- if (size == 0)
- value = ""; /* empty EA, do not remove */
-
- return __btrfs_setxattr(NULL, d_inode(dentry), name, value, size,
- flags);
+ return generic_setxattr(dentry, name, value, size, flags);
}
int btrfs_removexattr(struct dentry *dentry, const char *name)
{
struct btrfs_root *root = BTRFS_I(d_inode(dentry))->root;
- int ret;
- /*
- * The permission on security.* and system.* is not checked
- * in permission().
- */
if (btrfs_root_readonly(root))
return -EROFS;
-
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_removexattr(dentry, name);
-
- ret = btrfs_is_valid_xattr(name);
- if (ret)
- return ret;
-
- if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
- return btrfs_set_prop(d_inode(dentry), name,
- NULL, 0, XATTR_REPLACE);
-
- return __btrfs_setxattr(NULL, d_inode(dentry), name, NULL, 0,
- XATTR_REPLACE);
+ return generic_removexattr(dentry, name);
}
static int btrfs_initxattrs(struct inode *inode,
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index 5049608d1388..96807b3d22f5 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -28,8 +28,6 @@ extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
struct inode *inode, const char *name,
const void *value, size_t size, int flags);
-extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
- void *buffer, size_t size);
extern int btrfs_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
extern int btrfs_removexattr(struct dentry *dentry, const char *name);
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 7a6b02f72787..c0f3da3926a0 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -879,7 +879,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
loff_t pos, eof;
size_t len;
void *data;
- int ret;
+ int ret = -ENOBUFS;
ASSERT(op != NULL);
ASSERT(page != NULL);
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 8f84646f10e9..f19708487e2f 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -49,10 +49,10 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type)
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
BUG();
@@ -92,7 +92,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
ret = posix_acl_equiv_mode(acl, &new_mode);
if (ret < 0)
@@ -106,7 +106,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
ret = acl ? -EINVAL : 0;
goto out;
}
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
ret = -EINVAL;
@@ -202,11 +202,11 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
ceph_pagelist_encode_32(pagelist, acl && default_acl ? 2 : 1);
if (acl) {
- size_t len = strlen(POSIX_ACL_XATTR_ACCESS);
+ size_t len = strlen(XATTR_NAME_POSIX_ACL_ACCESS);
err = ceph_pagelist_reserve(pagelist, len + val_size1 + 8);
if (err)
goto out_err;
- ceph_pagelist_encode_string(pagelist, POSIX_ACL_XATTR_ACCESS,
+ ceph_pagelist_encode_string(pagelist, XATTR_NAME_POSIX_ACL_ACCESS,
len);
err = posix_acl_to_xattr(&init_user_ns, acl,
tmp_buf, val_size1);
@@ -216,12 +216,12 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
ceph_pagelist_append(pagelist, tmp_buf, val_size1);
}
if (default_acl) {
- size_t len = strlen(POSIX_ACL_XATTR_DEFAULT);
+ size_t len = strlen(XATTR_NAME_POSIX_ACL_DEFAULT);
err = ceph_pagelist_reserve(pagelist, len + val_size2 + 8);
if (err)
goto out_err;
err = ceph_pagelist_encode_string(pagelist,
- POSIX_ACL_XATTR_DEFAULT, len);
+ XATTR_NAME_POSIX_ACL_DEFAULT, len);
err = posix_acl_to_xattr(&init_user_ns, default_acl,
tmp_buf, val_size2);
if (err < 0)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 498dcfa2dcdb..da55eb8bcffa 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1756,7 +1756,7 @@ retry:
*/
static const struct inode_operations ceph_symlink_iops = {
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = ceph_setattr,
.getattr = ceph_getattr,
.setxattr = ceph_setxattr,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index cbc0f4bca0c0..90e4e2b398b6 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -900,8 +900,7 @@ const struct inode_operations cifs_file_inode_ops = {
const struct inode_operations cifs_symlink_inode_ops = {
.readlink = generic_readlink,
- .follow_link = cifs_follow_link,
- .put_link = kfree_put_link,
+ .get_link = cifs_get_link,
.permission = cifs_permission,
/* BB add the following two eventually */
/* revalidate: cifs_revalidate,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index c3cc1609025f..26a1187d4323 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -120,9 +120,8 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
#endif
/* Functions related to symlinks */
-extern const char *cifs_follow_link(struct dentry *direntry, void **cookie);
-extern int cifs_readlink(struct dentry *direntry, char __user *buffer,
- int buflen);
+extern const char *cifs_get_link(struct dentry *, struct inode *,
+ struct delayed_call *);
extern int cifs_symlink(struct inode *inode, struct dentry *direntry,
const char *symname);
extern int cifs_removexattr(struct dentry *, const char *);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 6b66dd5d1540..a329f5ba35aa 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1831,11 +1831,11 @@ cifs_invalidate_mapping(struct inode *inode)
* @word: long word containing the bit lock
*/
static int
-cifs_wait_bit_killable(struct wait_bit_key *key)
+cifs_wait_bit_killable(struct wait_bit_key *key, int mode)
{
- if (fatal_signal_pending(current))
- return -ERESTARTSYS;
freezable_schedule_unsafe();
+ if (signal_pending_state(mode, current))
+ return -ERESTARTSYS;
return 0;
}
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index e3548f73bdea..062c2375549a 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -627,9 +627,9 @@ cifs_hl_exit:
}
const char *
-cifs_follow_link(struct dentry *direntry, void **cookie)
+cifs_get_link(struct dentry *direntry, struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(direntry);
int rc = -ENOMEM;
unsigned int xid;
char *full_path = NULL;
@@ -639,6 +639,9 @@ cifs_follow_link(struct dentry *direntry, void **cookie)
struct cifs_tcon *tcon;
struct TCP_Server_Info *server;
+ if (!direntry)
+ return ERR_PTR(-ECHILD);
+
xid = get_xid();
tlink = cifs_sb_tlink(cifs_sb);
@@ -678,7 +681,8 @@ cifs_follow_link(struct dentry *direntry, void **cookie)
kfree(target_path);
return ERR_PTR(rc);
}
- return *cookie = target_path;
+ set_delayed_call(done, kfree_link, target_path);
+ return target_path;
}
int
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index ff9e1f8b16a4..f5dc2f0df4ad 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -190,8 +190,8 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
#endif /* CONFIG_CIFS_ACL */
} else {
int temp;
- temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
- strlen(POSIX_ACL_XATTR_ACCESS));
+ temp = strncmp(ea_name, XATTR_NAME_POSIX_ACL_ACCESS,
+ strlen(XATTR_NAME_POSIX_ACL_ACCESS));
if (temp == 0) {
#ifdef CONFIG_CIFS_POSIX
if (sb->s_flags & MS_POSIXACL)
@@ -203,8 +203,8 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
#else
cifs_dbg(FYI, "set POSIX ACL not supported\n");
#endif
- } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT,
- strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
+ } else if (strncmp(ea_name, XATTR_NAME_POSIX_ACL_DEFAULT,
+ strlen(XATTR_NAME_POSIX_ACL_DEFAULT)) == 0) {
#ifdef CONFIG_CIFS_POSIX
if (sb->s_flags & MS_POSIXACL)
rc = CIFSSMBSetPosixACL(xid, pTcon, full_path,
@@ -292,8 +292,8 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon,
full_path, ea_name, ea_value, buf_size,
cifs_sb->local_nls, cifs_remap(cifs_sb));
- } else if (strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
- strlen(POSIX_ACL_XATTR_ACCESS)) == 0) {
+ } else if (strncmp(ea_name, XATTR_NAME_POSIX_ACL_ACCESS,
+ strlen(XATTR_NAME_POSIX_ACL_ACCESS)) == 0) {
#ifdef CONFIG_CIFS_POSIX
if (sb->s_flags & MS_POSIXACL)
rc = CIFSSMBGetPosixACL(xid, pTcon, full_path,
@@ -303,8 +303,8 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
#else
cifs_dbg(FYI, "Query POSIX ACL not supported yet\n");
#endif /* CONFIG_CIFS_POSIX */
- } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT,
- strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
+ } else if (strncmp(ea_name, XATTR_NAME_POSIX_ACL_DEFAULT,
+ strlen(XATTR_NAME_POSIX_ACL_DEFAULT)) == 0) {
#ifdef CONFIG_CIFS_POSIX
if (sb->s_flags & MS_POSIXACL)
rc = CIFSSMBGetPosixACL(xid, pTcon, full_path,
diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c
index 7740b1c871c1..1bfb7ba4e85e 100644
--- a/fs/coda/cnode.c
+++ b/fs/coda/cnode.c
@@ -8,6 +8,7 @@
#include <linux/coda.h>
#include <linux/coda_psdev.h>
+#include <linux/pagemap.h>
#include "coda_linux.h"
static inline int coda_fideq(struct CodaFid *fid1, struct CodaFid *fid2)
@@ -17,8 +18,7 @@ static inline int coda_fideq(struct CodaFid *fid1, struct CodaFid *fid2)
static const struct inode_operations coda_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = coda_setattr,
};
@@ -35,6 +35,7 @@ static void coda_fill_inode(struct inode *inode, struct coda_vattr *attr)
inode->i_fop = &coda_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &coda_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &coda_symlink_aops;
inode->i_mapping = &inode->i_data;
} else
diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c
index ab94ef63caef..03736e20d720 100644
--- a/fs/coda/symlink.c
+++ b/fs/coda/symlink.c
@@ -26,7 +26,7 @@ static int coda_symlink_filler(struct file *file, struct page *page)
int error;
struct coda_inode_info *cii;
unsigned int len = PAGE_SIZE;
- char *p = kmap(page);
+ char *p = page_address(page);
cii = ITOC(inode);
@@ -34,13 +34,11 @@ static int coda_symlink_filler(struct file *file, struct page *page)
if (error)
goto fail;
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
fail:
SetPageError(page);
- kunmap(page);
unlock_page(page);
return error;
}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index dcf26537c935..9144b779d10e 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -58,6 +58,8 @@
#include <linux/atalk.h>
#include <linux/gfp.h>
+#include "internal.h"
+
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_sock.h>
#include <net/bluetooth/rfcomm.h>
@@ -115,19 +117,38 @@
#include <asm/fbio.h>
#endif
-static int w_long(unsigned int fd, unsigned int cmd,
- compat_ulong_t __user *argp)
+#define convert_in_user(srcptr, dstptr) \
+({ \
+ typeof(*srcptr) val; \
+ \
+ get_user(val, srcptr) || put_user(val, dstptr); \
+})
+
+static int do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ int err;
+
+ err = security_file_ioctl(file, cmd, arg);
+ if (err)
+ return err;
+
+ return vfs_ioctl(file, cmd, arg);
+}
+
+static int w_long(struct file *file,
+ unsigned int cmd, compat_ulong_t __user *argp)
{
- mm_segment_t old_fs = get_fs();
int err;
- unsigned long val;
+ unsigned long __user *valp = compat_alloc_user_space(sizeof(*valp));
- set_fs (KERNEL_DS);
- err = sys_ioctl(fd, cmd, (unsigned long)&val);
- set_fs (old_fs);
- if (!err && put_user(val, argp))
+ if (valp == NULL)
return -EFAULT;
- return err;
+ err = do_ioctl(file, cmd, (unsigned long)valp);
+ if (err)
+ return err;
+ if (convert_in_user(valp, argp))
+ return -EFAULT;
+ return 0;
}
struct compat_video_event {
@@ -139,23 +160,23 @@ struct compat_video_event {
} u;
};
-static int do_video_get_event(unsigned int fd, unsigned int cmd,
- struct compat_video_event __user *up)
+static int do_video_get_event(struct file *file,
+ unsigned int cmd, struct compat_video_event __user *up)
{
- struct video_event kevent;
- mm_segment_t old_fs = get_fs();
+ struct video_event __user *kevent =
+ compat_alloc_user_space(sizeof(*kevent));
int err;
- set_fs(KERNEL_DS);
- err = sys_ioctl(fd, cmd, (unsigned long) &kevent);
- set_fs(old_fs);
+ if (kevent == NULL)
+ return -EFAULT;
+ err = do_ioctl(file, cmd, (unsigned long)kevent);
if (!err) {
- err = put_user(kevent.type, &up->type);
- err |= put_user(kevent.timestamp, &up->timestamp);
- err |= put_user(kevent.u.size.w, &up->u.size.w);
- err |= put_user(kevent.u.size.h, &up->u.size.h);
- err |= put_user(kevent.u.size.aspect_ratio,
+ err = convert_in_user(&kevent->type, &up->type);
+ err |= convert_in_user(&kevent->timestamp, &up->timestamp);
+ err |= convert_in_user(&kevent->u.size.w, &up->u.size.w);
+ err |= convert_in_user(&kevent->u.size.h, &up->u.size.h);
+ err |= convert_in_user(&kevent->u.size.aspect_ratio,
&up->u.size.aspect_ratio);
if (err)
err = -EFAULT;
@@ -169,8 +190,8 @@ struct compat_video_still_picture {
int32_t size;
};
-static int do_video_stillpicture(unsigned int fd, unsigned int cmd,
- struct compat_video_still_picture __user *up)
+static int do_video_stillpicture(struct file *file,
+ unsigned int cmd, struct compat_video_still_picture __user *up)
{
struct video_still_picture __user *up_native;
compat_uptr_t fp;
@@ -190,7 +211,7 @@ static int do_video_stillpicture(unsigned int fd, unsigned int cmd,
if (err)
return -EFAULT;
- err = sys_ioctl(fd, cmd, (unsigned long) up_native);
+ err = do_ioctl(file, cmd, (unsigned long) up_native);
return err;
}
@@ -200,8 +221,8 @@ struct compat_video_spu_palette {
compat_uptr_t palette;
};
-static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd,
- struct compat_video_spu_palette __user *up)
+static int do_video_set_spu_palette(struct file *file,
+ unsigned int cmd, struct compat_video_spu_palette __user *up)
{
struct video_spu_palette __user *up_native;
compat_uptr_t palp;
@@ -218,7 +239,7 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd,
if (err)
return -EFAULT;
- err = sys_ioctl(fd, cmd, (unsigned long) up_native);
+ err = do_ioctl(file, cmd, (unsigned long) up_native);
return err;
}
@@ -276,7 +297,7 @@ static int sg_build_iovec(sg_io_hdr_t __user *sgio, void __user *dxferp, u16 iov
return 0;
}
-static int sg_ioctl_trans(unsigned int fd, unsigned int cmd,
+static int sg_ioctl_trans(struct file *file, unsigned int cmd,
sg_io_hdr32_t __user *sgio32)
{
sg_io_hdr_t __user *sgio;
@@ -289,7 +310,7 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd,
if (get_user(interface_id, &sgio32->interface_id))
return -EFAULT;
if (interface_id != 'S')
- return sys_ioctl(fd, cmd, (unsigned long)sgio32);
+ return do_ioctl(file, cmd, (unsigned long)sgio32);
if (get_user(iovec_count, &sgio32->iovec_count))
return -EFAULT;
@@ -349,7 +370,7 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd,
if (put_user(compat_ptr(data), &sgio->usr_ptr))
return -EFAULT;
- err = sys_ioctl(fd, cmd, (unsigned long) sgio);
+ err = do_ioctl(file, cmd, (unsigned long) sgio);
if (err >= 0) {
void __user *datap;
@@ -380,13 +401,13 @@ struct compat_sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
int unused;
};
-static int sg_grt_trans(unsigned int fd, unsigned int cmd, struct
- compat_sg_req_info __user *o)
+static int sg_grt_trans(struct file *file,
+ unsigned int cmd, struct compat_sg_req_info __user *o)
{
int err, i;
sg_req_info_t __user *r;
r = compat_alloc_user_space(sizeof(sg_req_info_t)*SG_MAX_QUEUE);
- err = sys_ioctl(fd,cmd,(unsigned long)r);
+ err = do_ioctl(file, cmd, (unsigned long)r);
if (err < 0)
return err;
for (i = 0; i < SG_MAX_QUEUE; i++) {
@@ -412,8 +433,8 @@ struct sock_fprog32 {
#define PPPIOCSPASS32 _IOW('t', 71, struct sock_fprog32)
#define PPPIOCSACTIVE32 _IOW('t', 70, struct sock_fprog32)
-static int ppp_sock_fprog_ioctl_trans(unsigned int fd, unsigned int cmd,
- struct sock_fprog32 __user *u_fprog32)
+static int ppp_sock_fprog_ioctl_trans(struct file *file,
+ unsigned int cmd, struct sock_fprog32 __user *u_fprog32)
{
struct sock_fprog __user *u_fprog64 = compat_alloc_user_space(sizeof(struct sock_fprog));
void __user *fptr64;
@@ -435,7 +456,7 @@ static int ppp_sock_fprog_ioctl_trans(unsigned int fd, unsigned int cmd,
else
cmd = PPPIOCSACTIVE;
- return sys_ioctl(fd, cmd, (unsigned long) u_fprog64);
+ return do_ioctl(file, cmd, (unsigned long) u_fprog64);
}
struct ppp_option_data32 {
@@ -451,7 +472,7 @@ struct ppp_idle32 {
};
#define PPPIOCGIDLE32 _IOR('t', 63, struct ppp_idle32)
-static int ppp_gidle(unsigned int fd, unsigned int cmd,
+static int ppp_gidle(struct file *file, unsigned int cmd,
struct ppp_idle32 __user *idle32)
{
struct ppp_idle __user *idle;
@@ -460,7 +481,7 @@ static int ppp_gidle(unsigned int fd, unsigned int cmd,
idle = compat_alloc_user_space(sizeof(*idle));
- err = sys_ioctl(fd, PPPIOCGIDLE, (unsigned long) idle);
+ err = do_ioctl(file, PPPIOCGIDLE, (unsigned long) idle);
if (!err) {
if (get_user(xmit, &idle->xmit_idle) ||
@@ -472,7 +493,7 @@ static int ppp_gidle(unsigned int fd, unsigned int cmd,
return err;
}
-static int ppp_scompress(unsigned int fd, unsigned int cmd,
+static int ppp_scompress(struct file *file, unsigned int cmd,
struct ppp_option_data32 __user *odata32)
{
struct ppp_option_data __user *odata;
@@ -492,7 +513,7 @@ static int ppp_scompress(unsigned int fd, unsigned int cmd,
sizeof(__u32) + sizeof(int)))
return -EFAULT;
- return sys_ioctl(fd, PPPIOCSCOMPRESS, (unsigned long) odata);
+ return do_ioctl(file, PPPIOCSCOMPRESS, (unsigned long) odata);
}
#ifdef CONFIG_BLOCK
@@ -512,12 +533,13 @@ struct mtpos32 {
};
#define MTIOCPOS32 _IOR('m', 3, struct mtpos32)
-static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
+static int mt_ioctl_trans(struct file *file,
+ unsigned int cmd, void __user *argp)
{
- mm_segment_t old_fs = get_fs();
- struct mtget get;
+ /* NULL initialization to make gcc shut up */
+ struct mtget __user *get = NULL;
struct mtget32 __user *umget32;
- struct mtpos pos;
+ struct mtpos __user *pos = NULL;
struct mtpos32 __user *upos32;
unsigned long kcmd;
void *karg;
@@ -526,32 +548,34 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
switch(cmd) {
case MTIOCPOS32:
kcmd = MTIOCPOS;
- karg = &pos;
+ pos = compat_alloc_user_space(sizeof(*pos));
+ karg = pos;
break;
default: /* MTIOCGET32 */
kcmd = MTIOCGET;
- karg = &get;
+ get = compat_alloc_user_space(sizeof(*get));
+ karg = get;
break;
}
- set_fs (KERNEL_DS);
- err = sys_ioctl (fd, kcmd, (unsigned long)karg);
- set_fs (old_fs);
+ if (karg == NULL)
+ return -EFAULT;
+ err = do_ioctl(file, kcmd, (unsigned long)karg);
if (err)
return err;
switch (cmd) {
case MTIOCPOS32:
upos32 = argp;
- err = __put_user(pos.mt_blkno, &upos32->mt_blkno);
+ err = convert_in_user(&pos->mt_blkno, &upos32->mt_blkno);
break;
case MTIOCGET32:
umget32 = argp;
- err = __put_user(get.mt_type, &umget32->mt_type);
- err |= __put_user(get.mt_resid, &umget32->mt_resid);
- err |= __put_user(get.mt_dsreg, &umget32->mt_dsreg);
- err |= __put_user(get.mt_gstat, &umget32->mt_gstat);
- err |= __put_user(get.mt_erreg, &umget32->mt_erreg);
- err |= __put_user(get.mt_fileno, &umget32->mt_fileno);
- err |= __put_user(get.mt_blkno, &umget32->mt_blkno);
+ err = convert_in_user(&get->mt_type, &umget32->mt_type);
+ err |= convert_in_user(&get->mt_resid, &umget32->mt_resid);
+ err |= convert_in_user(&get->mt_dsreg, &umget32->mt_dsreg);
+ err |= convert_in_user(&get->mt_gstat, &umget32->mt_gstat);
+ err |= convert_in_user(&get->mt_erreg, &umget32->mt_erreg);
+ err |= convert_in_user(&get->mt_fileno, &umget32->mt_fileno);
+ err |= convert_in_user(&get->mt_blkno, &umget32->mt_blkno);
break;
}
return err ? -EFAULT: 0;
@@ -605,42 +629,41 @@ struct serial_struct32 {
compat_int_t reserved[1];
};
-static int serial_struct_ioctl(unsigned fd, unsigned cmd,
- struct serial_struct32 __user *ss32)
+static int serial_struct_ioctl(struct file *file,
+ unsigned cmd, struct serial_struct32 __user *ss32)
{
typedef struct serial_struct32 SS32;
int err;
- struct serial_struct ss;
- mm_segment_t oldseg = get_fs();
+ struct serial_struct __user *ss = compat_alloc_user_space(sizeof(*ss));
__u32 udata;
unsigned int base;
+ unsigned char *iomem_base;
+ if (ss == NULL)
+ return -EFAULT;
if (cmd == TIOCSSERIAL) {
- if (!access_ok(VERIFY_READ, ss32, sizeof(SS32)))
- return -EFAULT;
- if (__copy_from_user(&ss, ss32, offsetof(SS32, iomem_base)))
+ if (copy_in_user(ss, ss32, offsetof(SS32, iomem_base)) ||
+ get_user(udata, &ss32->iomem_base))
return -EFAULT;
- if (__get_user(udata, &ss32->iomem_base))
+ iomem_base = compat_ptr(udata);
+ if (put_user(iomem_base, &ss->iomem_base) ||
+ convert_in_user(&ss32->iomem_reg_shift,
+ &ss->iomem_reg_shift) ||
+ convert_in_user(&ss32->port_high, &ss->port_high) ||
+ put_user(0UL, &ss->iomap_base))
return -EFAULT;
- ss.iomem_base = compat_ptr(udata);
- if (__get_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) ||
- __get_user(ss.port_high, &ss32->port_high))
- return -EFAULT;
- ss.iomap_base = 0UL;
}
- set_fs(KERNEL_DS);
- err = sys_ioctl(fd,cmd,(unsigned long)(&ss));
- set_fs(oldseg);
+ err = do_ioctl(file, cmd, (unsigned long)ss);
if (cmd == TIOCGSERIAL && err >= 0) {
- if (!access_ok(VERIFY_WRITE, ss32, sizeof(SS32)))
- return -EFAULT;
- if (__copy_to_user(ss32,&ss,offsetof(SS32,iomem_base)))
+ if (copy_in_user(ss32, ss, offsetof(SS32, iomem_base)) ||
+ get_user(iomem_base, &ss->iomem_base))
return -EFAULT;
- base = (unsigned long)ss.iomem_base >> 32 ?
- 0xffffffff : (unsigned)(unsigned long)ss.iomem_base;
- if (__put_user(base, &ss32->iomem_base) ||
- __put_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) ||
- __put_user(ss.port_high, &ss32->port_high))
+ base = (unsigned long)iomem_base >> 32 ?
+ 0xffffffff : (unsigned)(unsigned long)iomem_base;
+ if (put_user(base, &ss32->iomem_base) ||
+ convert_in_user(&ss->iomem_reg_shift,
+ &ss32->iomem_reg_shift) ||
+ convert_in_user(&ss->port_high, &ss32->port_high))
return -EFAULT;
}
return err;
@@ -674,8 +697,8 @@ struct i2c_rdwr_aligned {
struct i2c_msg msgs[0];
};
-static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd,
- struct i2c_rdwr_ioctl_data32 __user *udata)
+static int do_i2c_rdwr_ioctl(struct file *file,
+ unsigned int cmd, struct i2c_rdwr_ioctl_data32 __user *udata)
{
struct i2c_rdwr_aligned __user *tdata;
struct i2c_msg __user *tmsgs;
@@ -708,11 +731,11 @@ static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd,
put_user(compat_ptr(datap), &tmsgs[i].buf))
return -EFAULT;
}
- return sys_ioctl(fd, cmd, (unsigned long)tdata);
+ return do_ioctl(file, cmd, (unsigned long)tdata);
}
-static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd,
- struct i2c_smbus_ioctl_data32 __user *udata)
+static int do_i2c_smbus_ioctl(struct file *file,
+ unsigned int cmd, struct i2c_smbus_ioctl_data32 __user *udata)
{
struct i2c_smbus_ioctl_data __user *tdata;
compat_caddr_t datap;
@@ -734,7 +757,7 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd,
__put_user(compat_ptr(datap), &tdata->data))
return -EFAULT;
- return sys_ioctl(fd, cmd, (unsigned long)tdata);
+ return do_ioctl(file, cmd, (unsigned long)tdata);
}
#define RTC_IRQP_READ32 _IOR('p', 0x0b, compat_ulong_t)
@@ -742,29 +765,27 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd,
#define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t)
#define RTC_EPOCH_SET32 _IOW('p', 0x0e, compat_ulong_t)
-static int rtc_ioctl(unsigned fd, unsigned cmd, void __user *argp)
+static int rtc_ioctl(struct file *file,
+ unsigned cmd, void __user *argp)
{
- mm_segment_t oldfs = get_fs();
- compat_ulong_t val32;
- unsigned long kval;
+ unsigned long __user *valp = compat_alloc_user_space(sizeof(*valp));
int ret;
+ if (valp == NULL)
+ return -EFAULT;
switch (cmd) {
case RTC_IRQP_READ32:
case RTC_EPOCH_READ32:
- set_fs(KERNEL_DS);
- ret = sys_ioctl(fd, (cmd == RTC_IRQP_READ32) ?
+ ret = do_ioctl(file, (cmd == RTC_IRQP_READ32) ?
RTC_IRQP_READ : RTC_EPOCH_READ,
- (unsigned long)&kval);
- set_fs(oldfs);
+ (unsigned long)valp);
if (ret)
return ret;
- val32 = kval;
- return put_user(val32, (unsigned int __user *)argp);
+ return convert_in_user(valp, (unsigned int __user *)argp);
case RTC_IRQP_SET32:
- return sys_ioctl(fd, RTC_IRQP_SET, (unsigned long)argp);
+ return do_ioctl(file, RTC_IRQP_SET, (unsigned long)argp);
case RTC_EPOCH_SET32:
- return sys_ioctl(fd, RTC_EPOCH_SET, (unsigned long)argp);
+ return do_ioctl(file, RTC_EPOCH_SET, (unsigned long)argp);
}
return -ENOIOCTLCMD;
@@ -1436,53 +1457,53 @@ IGNORE_IOCTL(FBIOGCURSOR32)
* a compat_ioctl operation in the place that handleѕ the
* ioctl for the native case.
*/
-static long do_ioctl_trans(int fd, unsigned int cmd,
+static long do_ioctl_trans(unsigned int cmd,
unsigned long arg, struct file *file)
{
void __user *argp = compat_ptr(arg);
switch (cmd) {
case PPPIOCGIDLE32:
- return ppp_gidle(fd, cmd, argp);
+ return ppp_gidle(file, cmd, argp);
case PPPIOCSCOMPRESS32:
- return ppp_scompress(fd, cmd, argp);
+ return ppp_scompress(file, cmd, argp);
case PPPIOCSPASS32:
case PPPIOCSACTIVE32:
- return ppp_sock_fprog_ioctl_trans(fd, cmd, argp);
+ return ppp_sock_fprog_ioctl_trans(file, cmd, argp);
#ifdef CONFIG_BLOCK
case SG_IO:
- return sg_ioctl_trans(fd, cmd, argp);
+ return sg_ioctl_trans(file, cmd, argp);
case SG_GET_REQUEST_TABLE:
- return sg_grt_trans(fd, cmd, argp);
+ return sg_grt_trans(file, cmd, argp);
case MTIOCGET32:
case MTIOCPOS32:
- return mt_ioctl_trans(fd, cmd, argp);
+ return mt_ioctl_trans(file, cmd, argp);
#endif
/* Serial */
case TIOCGSERIAL:
case TIOCSSERIAL:
- return serial_struct_ioctl(fd, cmd, argp);
+ return serial_struct_ioctl(file, cmd, argp);
/* i2c */
case I2C_FUNCS:
- return w_long(fd, cmd, argp);
+ return w_long(file, cmd, argp);
case I2C_RDWR:
- return do_i2c_rdwr_ioctl(fd, cmd, argp);
+ return do_i2c_rdwr_ioctl(file, cmd, argp);
case I2C_SMBUS:
- return do_i2c_smbus_ioctl(fd, cmd, argp);
+ return do_i2c_smbus_ioctl(file, cmd, argp);
/* Not implemented in the native kernel */
case RTC_IRQP_READ32:
case RTC_IRQP_SET32:
case RTC_EPOCH_READ32:
case RTC_EPOCH_SET32:
- return rtc_ioctl(fd, cmd, argp);
+ return rtc_ioctl(file, cmd, argp);
/* dvb */
case VIDEO_GET_EVENT:
- return do_video_get_event(fd, cmd, argp);
+ return do_video_get_event(file, cmd, argp);
case VIDEO_STILLPICTURE:
- return do_video_stillpicture(fd, cmd, argp);
+ return do_video_stillpicture(file, cmd, argp);
case VIDEO_SET_SPU_PALETTE:
- return do_video_set_spu_palette(fd, cmd, argp);
+ return do_video_set_spu_palette(file, cmd, argp);
}
/*
@@ -1513,7 +1534,7 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
case NBD_SET_BLKSIZE:
case NBD_SET_SIZE:
case NBD_SET_SIZE_BLOCKS:
- return do_vfs_ioctl(file, fd, cmd, arg);
+ return vfs_ioctl(file, cmd, arg);
}
return -ENOIOCTLCMD;
@@ -1602,7 +1623,7 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
if (compat_ioctl_check_table(XFORM(cmd)))
goto found_handler;
- error = do_ioctl_trans(fd, cmd, arg, f.file);
+ error = do_ioctl_trans(cmd, arg, f.file);
if (error == -ENOIOCTLCMD)
error = -ENOTTY;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index c81ce7f200a6..a7a1b218f308 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1636,6 +1636,116 @@ const struct file_operations configfs_dir_operations = {
.iterate = configfs_readdir,
};
+/**
+ * configfs_register_group - creates a parent-child relation between two groups
+ * @parent_group: parent group
+ * @group: child group
+ *
+ * link groups, creates dentry for the child and attaches it to the
+ * parent dentry.
+ *
+ * Return: 0 on success, negative errno code on error
+ */
+int configfs_register_group(struct config_group *parent_group,
+ struct config_group *group)
+{
+ struct configfs_subsystem *subsys = parent_group->cg_subsys;
+ struct dentry *parent;
+ int ret;
+
+ mutex_lock(&subsys->su_mutex);
+ link_group(parent_group, group);
+ mutex_unlock(&subsys->su_mutex);
+
+ parent = parent_group->cg_item.ci_dentry;
+
+ mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+ ret = create_default_group(parent_group, group);
+ if (!ret) {
+ spin_lock(&configfs_dirent_lock);
+ configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
+ spin_unlock(&configfs_dirent_lock);
+ }
+ mutex_unlock(&d_inode(parent)->i_mutex);
+ return ret;
+}
+EXPORT_SYMBOL(configfs_register_group);
+
+/**
+ * configfs_unregister_group() - unregisters a child group from its parent
+ * @group: parent group to be unregistered
+ *
+ * Undoes configfs_register_group()
+ */
+void configfs_unregister_group(struct config_group *group)
+{
+ struct configfs_subsystem *subsys = group->cg_subsys;
+ struct dentry *dentry = group->cg_item.ci_dentry;
+ struct dentry *parent = group->cg_item.ci_parent->ci_dentry;
+
+ mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+ spin_lock(&configfs_dirent_lock);
+ configfs_detach_prep(dentry, NULL);
+ spin_unlock(&configfs_dirent_lock);
+
+ configfs_detach_group(&group->cg_item);
+ d_inode(dentry)->i_flags |= S_DEAD;
+ dont_mount(dentry);
+ d_delete(dentry);
+ mutex_unlock(&d_inode(parent)->i_mutex);
+
+ dput(dentry);
+
+ mutex_lock(&subsys->su_mutex);
+ unlink_group(group);
+ mutex_unlock(&subsys->su_mutex);
+}
+EXPORT_SYMBOL(configfs_unregister_group);
+
+/**
+ * configfs_register_default_group() - allocates and registers a child group
+ * @parent_group: parent group
+ * @name: child group name
+ * @item_type: child item type description
+ *
+ * boilerplate to allocate and register a child group with its parent. We need
+ * kzalloc'ed memory because child's default_group is initially empty.
+ *
+ * Return: allocated config group or ERR_PTR() on error
+ */
+struct config_group *
+configfs_register_default_group(struct config_group *parent_group,
+ const char *name,
+ struct config_item_type *item_type)
+{
+ int ret;
+ struct config_group *group;
+
+ group = kzalloc(sizeof(*group), GFP_KERNEL);
+ if (!group)
+ return ERR_PTR(-ENOMEM);
+ config_group_init_type_name(group, name, item_type);
+
+ ret = configfs_register_group(parent_group, group);
+ if (ret) {
+ kfree(group);
+ return ERR_PTR(ret);
+ }
+ return group;
+}
+EXPORT_SYMBOL(configfs_register_default_group);
+
+/**
+ * configfs_unregister_default_group() - unregisters and frees a child group
+ * @group: the group to act on
+ */
+void configfs_unregister_default_group(struct config_group *group)
+{
+ configfs_unregister_group(group);
+ kfree(group);
+}
+EXPORT_SYMBOL(configfs_unregister_default_group);
+
int configfs_register_subsystem(struct configfs_subsystem *subsys)
{
int err;
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index ec5c8325b503..db6d69289608 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -279,27 +279,33 @@ static int configfs_getlink(struct dentry *dentry, char * path)
}
-static const char *configfs_follow_link(struct dentry *dentry, void **cookie)
+static const char *configfs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- unsigned long page = get_zeroed_page(GFP_KERNEL);
+ char *body;
int error;
- if (!page)
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
+ body = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!body)
return ERR_PTR(-ENOMEM);
- error = configfs_getlink(dentry, (char *)page);
+ error = configfs_getlink(dentry, body);
if (!error) {
- return *cookie = (void *)page;
+ set_delayed_call(done, kfree_link, body);
+ return body;
}
- free_page(page);
+ kfree(body);
return ERR_PTR(error);
}
const struct inode_operations configfs_symlink_inode_operations = {
- .follow_link = configfs_follow_link,
+ .get_link = configfs_get_link,
.readlink = generic_readlink,
- .put_link = free_page_put_link,
.setattr = configfs_setattr,
};
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 355c522f3585..b862bc219cd7 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -100,6 +100,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
break;
case S_IFLNK:
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &cramfs_aops;
break;
default:
diff --git a/fs/dax.c b/fs/dax.c
index d1e5cb7311a1..43671b68220e 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -541,6 +541,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
unsigned long pfn;
int result = 0;
+ /* dax pmd mappings are broken wrt gup and fork */
+ if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
+ return VM_FAULT_FALLBACK;
+
/* Fall back to PTEs if we're going to COW */
if (write && !(vma->vm_flags & VM_SHARED))
return VM_FAULT_FALLBACK;
diff --git a/fs/dcache.c b/fs/dcache.c
index 5c33aeb0f68f..d27f0909d9f6 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1734,7 +1734,7 @@ static unsigned d_flags_for_inode(struct inode *inode)
}
if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
- if (unlikely(inode->i_op->follow_link)) {
+ if (unlikely(inode->i_op->get_link)) {
add_flags = DCACHE_SYMLINK_TYPE;
goto type_determined;
}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index cb5337d8c273..602e8441bc0f 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1169,6 +1169,16 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
}
}
+ /* Once we sampled i_size check for reads beyond EOF */
+ dio->i_size = i_size_read(inode);
+ if (iov_iter_rw(iter) == READ && offset >= dio->i_size) {
+ if (dio->flags & DIO_LOCKING)
+ mutex_unlock(&inode->i_mutex);
+ kmem_cache_free(dio_cache, dio);
+ retval = 0;
+ goto out;
+ }
+
/*
* For file extending writes updating i_size before data writeouts
* complete can expose uninitialized blocks in dumb filesystems.
@@ -1222,7 +1232,6 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
sdio.next_block_for_io = -1;
dio->iocb = iocb;
- dio->i_size = i_size_read(inode);
spin_lock_init(&dio->bio_lock);
dio->refcount = 1;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 87e9d796cf7d..3a37bd3f9637 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -421,7 +421,7 @@ static void lowcomms_write_space(struct sock *sk)
if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) {
con->sock->sk->sk_write_pending--;
- clear_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags);
+ clear_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags);
}
if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
@@ -1448,7 +1448,7 @@ static void send_to_sock(struct connection *con)
msg_flags);
if (ret == -EAGAIN || ret == 0) {
if (ret == -EAGAIN &&
- test_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags) &&
+ test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) &&
!test_and_set_bit(CF_APP_LIMITED, &con->flags)) {
/* Notify TCP that we're limited by the
* application window size.
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e2e47ba5d313..a4dddc61594c 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -674,16 +674,24 @@ out:
return rc ? ERR_PTR(rc) : buf;
}
-static const char *ecryptfs_follow_link(struct dentry *dentry, void **cookie)
+static const char *ecryptfs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
size_t len;
- char *buf = ecryptfs_readlink_lower(dentry, &len);
+ char *buf;
+
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
+ buf = ecryptfs_readlink_lower(dentry, &len);
if (IS_ERR(buf))
return buf;
fsstack_copy_attr_atime(d_inode(dentry),
d_inode(ecryptfs_dentry_to_lower(dentry)));
buf[len] = '\0';
- return *cookie = buf;
+ set_delayed_call(done, kfree_link, buf);
+ return buf;
}
/**
@@ -1095,8 +1103,7 @@ out:
const struct inode_operations ecryptfs_symlink_iops = {
.readlink = generic_readlink,
- .follow_link = ecryptfs_follow_link,
- .put_link = kfree_put_link,
+ .get_link = ecryptfs_get_link,
.permission = ecryptfs_permission,
.setattr = ecryptfs_setattr,
.getattr = ecryptfs_getattr_link,
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index 079d20306ee1..cdf0872382af 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -151,6 +151,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
break;
case S_IFLNK:
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &efs_symlink_aops;
break;
case S_IFCHR:
diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c
index 75117d0dac2b..4870cc82deb0 100644
--- a/fs/efs/symlink.c
+++ b/fs/efs/symlink.c
@@ -13,7 +13,7 @@
static int efs_symlink_readpage(struct file *file, struct page *page)
{
- char *link = kmap(page);
+ char *link = page_address(page);
struct buffer_head * bh;
struct inode * inode = page->mapping->host;
efs_block_t size = inode->i_size;
@@ -39,12 +39,10 @@ static int efs_symlink_readpage(struct file *file, struct page *page)
}
link[size] = '\0';
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
fail:
SetPageError(page);
- kunmap(page);
unlock_page(page);
return err;
}
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 73c64daa0f55..9eaf595aeaf8 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -592,10 +592,7 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
}
unlock_page(page);
}
- if (PageDirty(page) || PageWriteback(page))
- *uptodate = true;
- else
- *uptodate = PageUptodate(page);
+ *uptodate = PageUptodate(page);
EXOFS_DBGMSG2("index=0x%lx uptodate=%d\n", index, *uptodate);
return page;
} else {
@@ -1227,6 +1224,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
inode->i_link = (char *)oi->i_data;
} else {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &exofs_aops;
}
} else {
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 994e078da4bb..c20d77df2679 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -111,6 +111,7 @@ static int exofs_symlink(struct inode *dir, struct dentry *dentry,
if (l > sizeof(oi->i_data)) {
/* slow symlink */
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &exofs_aops;
memset(oi->i_data, 0, sizeof(oi->i_data));
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 0aa9bf6e6e53..338eefda70c6 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1420,6 +1420,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
sizeof(ei->i_data) - 1);
} else {
inode->i_op = &ext2_symlink_inode_operations;
+ inode_nohighmem(inode);
if (test_opt(inode->i_sb, NOBH))
inode->i_mapping->a_ops = &ext2_nobh_aops;
else
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 3267a80dbbe2..7a2be8f7f3c3 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -183,6 +183,7 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry,
if (l > sizeof (EXT2_I(inode)->i_data)) {
/* slow symlink */
inode->i_op = &ext2_symlink_inode_operations;
+ inode_nohighmem(inode);
if (test_opt(inode->i_sb, NOBH))
inode->i_mapping->a_ops = &ext2_nobh_aops;
else
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 3a71cea68420..748d35afc902 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -569,6 +569,8 @@ static int parse_options(char *options, struct super_block *sb)
/* Fall through */
case Opt_dax:
#ifdef CONFIG_FS_DAX
+ ext2_msg(sb, KERN_WARNING,
+ "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
set_opt(sbi->s_mount_opt, DAX);
#else
ext2_msg(sb, KERN_INFO, "dax option not supported");
diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c
index ae17179f3810..3495d8ae4b33 100644
--- a/fs/ext2/symlink.c
+++ b/fs/ext2/symlink.c
@@ -22,8 +22,7 @@
const struct inode_operations ext2_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = ext2_setattr,
#ifdef CONFIG_EXT2_FS_XATTR
.setxattr = generic_setxattr,
@@ -35,7 +34,7 @@ const struct inode_operations ext2_symlink_inode_operations = {
const struct inode_operations ext2_fast_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = ext2_setattr,
#ifdef CONFIG_EXT2_FS_XATTR
.setxattr = generic_setxattr,
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index fa70848afa8f..cd95d14f9cc2 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -292,16 +292,21 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
const struct xattr_handler *handler =
ext2_xattr_handler(entry->e_name_index);
- if (handler) {
- size_t size = handler->list(handler, dentry, buffer,
- rest, entry->e_name,
- entry->e_name_len);
+ if (handler && (!handler->list || handler->list(dentry))) {
+ const char *prefix = handler->prefix ?: handler->name;
+ size_t prefix_len = strlen(prefix);
+ size_t size = prefix_len + entry->e_name_len + 1;
+
if (buffer) {
if (size > rest) {
error = -ERANGE;
goto cleanup;
}
- buffer += size;
+ memcpy(buffer, prefix, prefix_len);
+ buffer += prefix_len;
+ memcpy(buffer, entry->e_name, entry->e_name_len);
+ buffer += entry->e_name_len;
+ *buffer++ = 0;
}
rest -= size;
}
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index dfb08750370d..ba97f243b050 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -7,29 +7,11 @@
#include <linux/security.h>
#include "xattr.h"
-static size_t
-ext2_xattr_security_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
-{
- const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
static int
ext2_xattr_security_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_SECURITY, name,
buffer, size);
}
@@ -39,8 +21,6 @@ ext2_xattr_security_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext2_xattr_set(d_inode(dentry), EXT2_XATTR_INDEX_SECURITY, name,
value, size, flags);
}
@@ -71,7 +51,6 @@ ext2_init_security(struct inode *inode, struct inode *dir,
const struct xattr_handler ext2_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .list = ext2_xattr_security_list,
.get = ext2_xattr_security_get,
.set = ext2_xattr_security_set,
};
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 3150dd3a7859..2c94d1930626 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -8,23 +8,10 @@
#include "ext2.h"
#include "xattr.h"
-static size_t
-ext2_xattr_trusted_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool
+ext2_xattr_trusted_list(struct dentry *dentry)
{
- const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (!capable(CAP_SYS_ADMIN))
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
+ return capable(CAP_SYS_ADMIN);
}
static int
@@ -32,8 +19,6 @@ ext2_xattr_trusted_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_TRUSTED, name,
buffer, size);
}
@@ -43,8 +28,6 @@ ext2_xattr_trusted_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext2_xattr_set(d_inode(dentry), EXT2_XATTR_INDEX_TRUSTED, name,
value, size, flags);
}
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index 339a49bbb8ef..72a2a96d677f 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -10,23 +10,10 @@
#include "ext2.h"
#include "xattr.h"
-static size_t
-ext2_xattr_user_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool
+ext2_xattr_user_list(struct dentry *dentry)
{
- const size_t prefix_len = XATTR_USER_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (!test_opt(dentry->d_sb, XATTR_USER))
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_USER_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
+ return test_opt(dentry->d_sb, XATTR_USER);
}
static int
@@ -34,8 +21,6 @@ ext2_xattr_user_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_USER,
@@ -47,8 +32,6 @@ ext2_xattr_user_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index af06830bfc00..1a0835073663 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -389,7 +389,7 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
struct ext4_crypto_ctx *ctx;
struct page *ciphertext_page = NULL;
struct bio *bio;
- ext4_lblk_t lblk = ex->ee_block;
+ ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
ext4_fsblk_t pblk = ext4_ext_pblock(ex);
unsigned int len = ext4_ext_get_actual_len(ex);
int ret, err = 0;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 750063f7a50c..cc7ca4e87144 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -26,6 +26,7 @@
#include <linux/seqlock.h>
#include <linux/mutex.h>
#include <linux/timer.h>
+#include <linux/version.h>
#include <linux/wait.h>
#include <linux/blockgroup_lock.h>
#include <linux/percpu_counter.h>
@@ -727,19 +728,55 @@ struct move_extent {
<= (EXT4_GOOD_OLD_INODE_SIZE + \
(einode)->i_extra_isize)) \
+/*
+ * We use an encoding that preserves the times for extra epoch "00":
+ *
+ * extra msb of adjust for signed
+ * epoch 32-bit 32-bit tv_sec to
+ * bits time decoded 64-bit tv_sec 64-bit tv_sec valid time range
+ * 0 0 1 -0x80000000..-0x00000001 0x000000000 1901-12-13..1969-12-31
+ * 0 0 0 0x000000000..0x07fffffff 0x000000000 1970-01-01..2038-01-19
+ * 0 1 1 0x080000000..0x0ffffffff 0x100000000 2038-01-19..2106-02-07
+ * 0 1 0 0x100000000..0x17fffffff 0x100000000 2106-02-07..2174-02-25
+ * 1 0 1 0x180000000..0x1ffffffff 0x200000000 2174-02-25..2242-03-16
+ * 1 0 0 0x200000000..0x27fffffff 0x200000000 2242-03-16..2310-04-04
+ * 1 1 1 0x280000000..0x2ffffffff 0x300000000 2310-04-04..2378-04-22
+ * 1 1 0 0x300000000..0x37fffffff 0x300000000 2378-04-22..2446-05-10
+ *
+ * Note that previous versions of the kernel on 64-bit systems would
+ * incorrectly use extra epoch bits 1,1 for dates between 1901 and
+ * 1970. e2fsck will correct this, assuming that it is run on the
+ * affected filesystem before 2242.
+ */
+
static inline __le32 ext4_encode_extra_time(struct timespec *time)
{
- return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
- (time->tv_sec >> 32) & EXT4_EPOCH_MASK : 0) |
- ((time->tv_nsec << EXT4_EPOCH_BITS) & EXT4_NSEC_MASK));
+ u32 extra = sizeof(time->tv_sec) > 4 ?
+ ((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK : 0;
+ return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS));
}
static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
{
- if (sizeof(time->tv_sec) > 4)
- time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
- << 32;
- time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
+ if (unlikely(sizeof(time->tv_sec) > 4 &&
+ (extra & cpu_to_le32(EXT4_EPOCH_MASK)))) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,20,0)
+ /* Handle legacy encoding of pre-1970 dates with epoch
+ * bits 1,1. We assume that by kernel version 4.20,
+ * everyone will have run fsck over the affected
+ * filesystems to correct the problem. (This
+ * backwards compatibility may be removed before this
+ * time, at the discretion of the ext4 developers.)
+ */
+ u64 extra_bits = le32_to_cpu(extra) & EXT4_EPOCH_MASK;
+ if (extra_bits == 3 && ((time->tv_sec) & 0x80000000) != 0)
+ extra_bits = 0;
+ time->tv_sec += extra_bits << 32;
+#else
+ time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32;
+#endif
+ }
+ time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
}
#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ea433a7f4bca..b3bd912df6bf 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4283,6 +4283,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
inode->i_op = &ext4_symlink_inode_operations;
ext4_set_aops(inode);
}
+ inode_nohighmem(inode);
} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
inode->i_op = &ext4_special_inode_operations;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a969ab39f302..f27e0c2598c5 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3132,6 +3132,7 @@ static int ext4_symlink(struct inode *dir,
if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
if (!encryption_required)
inode->i_op = &ext4_symlink_inode_operations;
+ inode_nohighmem(inode);
ext4_set_aops(inode);
/*
* We cannot call page_symlink() with transaction started
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 753f4e68b820..c9ab67da6e5a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1664,8 +1664,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
}
sbi->s_jquota_fmt = m->mount_opt;
#endif
-#ifndef CONFIG_FS_DAX
} else if (token == Opt_dax) {
+#ifdef CONFIG_FS_DAX
+ ext4_msg(sb, KERN_WARNING,
+ "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
+ sbi->s_mount_opt |= m->mount_opt;
+#else
ext4_msg(sb, KERN_INFO, "dax option not supported");
return -1;
#endif
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index abe2401ce405..6f7ee30a89ce 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -23,17 +23,21 @@
#include "xattr.h"
#ifdef CONFIG_EXT4_FS_ENCRYPTION
-static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cookie)
+static const char *ext4_encrypted_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
struct page *cpage = NULL;
char *caddr, *paddr = NULL;
struct ext4_str cstr, pstr;
- struct inode *inode = d_inode(dentry);
struct ext4_encrypted_symlink_data *sd;
loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
int res;
u32 plen, max_size = inode->i_sb->s_blocksize;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
res = ext4_get_encryption_info(inode);
if (res)
return ERR_PTR(res);
@@ -45,14 +49,14 @@ static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cook
cpage = read_mapping_page(inode->i_mapping, 0, NULL);
if (IS_ERR(cpage))
return ERR_CAST(cpage);
- caddr = kmap(cpage);
+ caddr = page_address(cpage);
caddr[size] = 0;
}
/* Symlink is encrypted */
sd = (struct ext4_encrypted_symlink_data *)caddr;
cstr.name = sd->encrypted_path;
- cstr.len = le32_to_cpu(sd->len);
+ cstr.len = le16_to_cpu(sd->len);
if ((cstr.len +
sizeof(struct ext4_encrypted_symlink_data) - 1) >
max_size) {
@@ -75,24 +79,20 @@ static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cook
/* Null-terminate the name */
if (res <= plen)
paddr[res] = '\0';
- if (cpage) {
- kunmap(cpage);
+ if (cpage)
page_cache_release(cpage);
- }
- return *cookie = paddr;
+ set_delayed_call(done, kfree_link, paddr);
+ return paddr;
errout:
- if (cpage) {
- kunmap(cpage);
+ if (cpage)
page_cache_release(cpage);
- }
kfree(paddr);
return ERR_PTR(res);
}
const struct inode_operations ext4_encrypted_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = ext4_encrypted_follow_link,
- .put_link = kfree_put_link,
+ .get_link = ext4_encrypted_get_link,
.setattr = ext4_setattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
@@ -103,8 +103,7 @@ const struct inode_operations ext4_encrypted_symlink_inode_operations = {
const struct inode_operations ext4_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = ext4_setattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
@@ -114,7 +113,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
const struct inode_operations ext4_fast_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = ext4_setattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 1b57c72f4a00..1420a3c614af 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -358,7 +358,7 @@ static int name##_open(struct inode *inode, struct file *file) \
return single_open(file, ext4_seq_##name##_show, PDE_DATA(inode)); \
} \
\
-const struct file_operations ext4_seq_##name##_fops = { \
+static const struct file_operations ext4_seq_##name##_fops = { \
.owner = THIS_MODULE, \
.open = name##_open, \
.read = seq_read, \
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 6b6b3e751f8c..e9b9afdd1d96 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -404,19 +404,24 @@ ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
const struct xattr_handler *handler =
ext4_xattr_handler(entry->e_name_index);
- if (handler) {
- size_t size = handler->list(handler, dentry, buffer,
- rest, entry->e_name,
- entry->e_name_len);
+ if (handler && (!handler->list || handler->list(dentry))) {
+ const char *prefix = handler->prefix ?: handler->name;
+ size_t prefix_len = strlen(prefix);
+ size_t size = prefix_len + entry->e_name_len + 1;
+
if (buffer) {
if (size > rest)
return -ERANGE;
- buffer += size;
+ memcpy(buffer, prefix, prefix_len);
+ buffer += prefix_len;
+ memcpy(buffer, entry->e_name, entry->e_name_len);
+ buffer += entry->e_name_len;
+ *buffer++ = 0;
}
rest -= size;
}
}
- return buffer_size - rest;
+ return buffer_size - rest; /* total size */
}
static int
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index 36f4c1a84c21..3e81bdca071a 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -11,30 +11,11 @@
#include "ext4.h"
#include "xattr.h"
-static size_t
-ext4_xattr_security_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
-{
- const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
- const size_t total_len = prefix_len + name_len + 1;
-
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
static int
ext4_xattr_security_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_SECURITY,
name, buffer, size);
}
@@ -44,8 +25,6 @@ ext4_xattr_security_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_SECURITY,
name, value, size, flags);
}
@@ -79,7 +58,6 @@ ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
const struct xattr_handler ext4_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .list = ext4_xattr_security_list,
.get = ext4_xattr_security_get,
.set = ext4_xattr_security_set,
};
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index 488089053342..2a3c6f9b8cb8 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -12,23 +12,10 @@
#include "ext4.h"
#include "xattr.h"
-static size_t
-ext4_xattr_trusted_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool
+ext4_xattr_trusted_list(struct dentry *dentry)
{
- const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (!capable(CAP_SYS_ADMIN))
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
+ return capable(CAP_SYS_ADMIN);
}
static int
@@ -36,8 +23,6 @@ ext4_xattr_trusted_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name, void *buffer,
size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_TRUSTED,
name, buffer, size);
}
@@ -47,8 +32,6 @@ ext4_xattr_trusted_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_TRUSTED,
name, value, size, flags);
}
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index d2dec3364062..d152f431e432 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -11,23 +11,10 @@
#include "ext4.h"
#include "xattr.h"
-static size_t
-ext4_xattr_user_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool
+ext4_xattr_user_list(struct dentry *dentry)
{
- const size_t prefix_len = XATTR_USER_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (!test_opt(dentry->d_sb, XATTR_USER))
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_USER_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
+ return test_opt(dentry->d_sb, XATTR_USER);
}
static int
@@ -35,8 +22,6 @@ ext4_xattr_user_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_USER,
@@ -48,8 +33,6 @@ ext4_xattr_user_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_USER,
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 97e20decacb4..5528801a5baf 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -202,6 +202,7 @@ make_now:
inode->i_op = &f2fs_encrypted_symlink_inode_operations;
else
inode->i_op = &f2fs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &f2fs_dblock_aops;
} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 2c32110f9fc0..e7587fce1b80 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -315,12 +315,15 @@ fail:
return err;
}
-static const char *f2fs_follow_link(struct dentry *dentry, void **cookie)
+static const char *f2fs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- const char *link = page_follow_link_light(dentry, cookie);
+ const char *link = page_get_link(dentry, inode, done);
if (!IS_ERR(link) && !*link) {
/* this is broken symlink case */
- page_put_link(NULL, *cookie);
+ do_delayed_call(done);
+ clear_delayed_call(done);
link = ERR_PTR(-ENOENT);
}
return link;
@@ -351,6 +354,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
inode->i_op = &f2fs_encrypted_symlink_inode_operations;
else
inode->i_op = &f2fs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &f2fs_dblock_aops;
f2fs_lock_op(sbi);
@@ -923,18 +927,22 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
}
#ifdef CONFIG_F2FS_FS_ENCRYPTION
-static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cookie)
+static const char *f2fs_encrypted_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
struct page *cpage = NULL;
char *caddr, *paddr = NULL;
struct f2fs_str cstr;
struct f2fs_str pstr = FSTR_INIT(NULL, 0);
- struct inode *inode = d_inode(dentry);
struct f2fs_encrypted_symlink_data *sd;
loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
u32 max_size = inode->i_sb->s_blocksize;
int res;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
res = f2fs_get_encryption_info(inode);
if (res)
return ERR_PTR(res);
@@ -942,7 +950,7 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook
cpage = read_mapping_page(inode->i_mapping, 0, NULL);
if (IS_ERR(cpage))
return ERR_CAST(cpage);
- caddr = kmap(cpage);
+ caddr = page_address(cpage);
caddr[size] = 0;
/* Symlink is encrypted */
@@ -982,21 +990,19 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook
/* Null-terminate the name */
paddr[res] = '\0';
- kunmap(cpage);
page_cache_release(cpage);
- return *cookie = paddr;
+ set_delayed_call(done, kfree_link, paddr);
+ return paddr;
errout:
kfree(cstr.name);
f2fs_fname_crypto_free_buffer(&pstr);
- kunmap(cpage);
page_cache_release(cpage);
return ERR_PTR(res);
}
const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = f2fs_encrypted_follow_link,
- .put_link = kfree_put_link,
+ .get_link = f2fs_encrypted_get_link,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
.setxattr = generic_setxattr,
@@ -1031,8 +1037,7 @@ const struct inode_operations f2fs_dir_inode_operations = {
const struct inode_operations f2fs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = f2fs_follow_link,
- .put_link = page_put_link,
+ .get_link = f2fs_get_link,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
#ifdef CONFIG_F2FS_FS_XATTR
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 862368a32e53..036952a945fa 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -25,38 +25,6 @@
#include "f2fs.h"
#include "xattr.h"
-static size_t f2fs_xattr_generic_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t len)
-{
- struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- int total_len, prefix_len;
-
- switch (handler->flags) {
- case F2FS_XATTR_INDEX_USER:
- if (!test_opt(sbi, XATTR_USER))
- return -EOPNOTSUPP;
- break;
- case F2FS_XATTR_INDEX_TRUSTED:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
- case F2FS_XATTR_INDEX_SECURITY:
- break;
- default:
- return -EINVAL;
- }
-
- prefix_len = strlen(handler->prefix);
- total_len = prefix_len + len + 1;
- if (list && total_len <= list_size) {
- memcpy(list, handler->prefix, prefix_len);
- memcpy(list + prefix_len, name, len);
- list[prefix_len + len] = '\0';
- }
- return total_len;
-}
-
static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name, void *buffer,
size_t size)
@@ -77,8 +45,6 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
default:
return -EINVAL;
}
- if (strcmp(name, "") == 0)
- return -EINVAL;
return f2fs_getxattr(d_inode(dentry), handler->flags, name,
buffer, size, NULL);
}
@@ -103,24 +69,20 @@ static int f2fs_xattr_generic_set(const struct xattr_handler *handler,
default:
return -EINVAL;
}
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
return f2fs_setxattr(d_inode(dentry), handler->flags, name,
value, size, NULL, flags);
}
-static size_t f2fs_xattr_advise_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t len)
+static bool f2fs_xattr_user_list(struct dentry *dentry)
{
- const char *xname = F2FS_SYSTEM_ADVISE_PREFIX;
- size_t size;
+ struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- size = strlen(xname) + 1;
- if (list && size <= list_size)
- memcpy(list, xname, size);
- return size;
+ return test_opt(sbi, XATTR_USER);
+}
+
+static bool f2fs_xattr_trusted_list(struct dentry *dentry)
+{
+ return capable(CAP_SYS_ADMIN);
}
static int f2fs_xattr_advise_get(const struct xattr_handler *handler,
@@ -129,9 +91,6 @@ static int f2fs_xattr_advise_get(const struct xattr_handler *handler,
{
struct inode *inode = d_inode(dentry);
- if (strcmp(name, "") != 0)
- return -EINVAL;
-
if (buffer)
*((char *)buffer) = F2FS_I(inode)->i_advise;
return sizeof(char);
@@ -143,8 +102,6 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
{
struct inode *inode = d_inode(dentry);
- if (strcmp(name, "") != 0)
- return -EINVAL;
if (!inode_owner_or_capable(inode))
return -EPERM;
if (value == NULL)
@@ -183,7 +140,7 @@ int f2fs_init_security(struct inode *inode, struct inode *dir,
const struct xattr_handler f2fs_xattr_user_handler = {
.prefix = XATTR_USER_PREFIX,
.flags = F2FS_XATTR_INDEX_USER,
- .list = f2fs_xattr_generic_list,
+ .list = f2fs_xattr_user_list,
.get = f2fs_xattr_generic_get,
.set = f2fs_xattr_generic_set,
};
@@ -191,15 +148,14 @@ const struct xattr_handler f2fs_xattr_user_handler = {
const struct xattr_handler f2fs_xattr_trusted_handler = {
.prefix = XATTR_TRUSTED_PREFIX,
.flags = F2FS_XATTR_INDEX_TRUSTED,
- .list = f2fs_xattr_generic_list,
+ .list = f2fs_xattr_trusted_list,
.get = f2fs_xattr_generic_get,
.set = f2fs_xattr_generic_set,
};
const struct xattr_handler f2fs_xattr_advise_handler = {
- .prefix = F2FS_SYSTEM_ADVISE_PREFIX,
+ .name = F2FS_SYSTEM_ADVISE_NAME,
.flags = F2FS_XATTR_INDEX_ADVISE,
- .list = f2fs_xattr_advise_list,
.get = f2fs_xattr_advise_get,
.set = f2fs_xattr_advise_set,
};
@@ -207,7 +163,6 @@ const struct xattr_handler f2fs_xattr_advise_handler = {
const struct xattr_handler f2fs_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.flags = F2FS_XATTR_INDEX_SECURITY,
- .list = f2fs_xattr_generic_list,
.get = f2fs_xattr_generic_get,
.set = f2fs_xattr_generic_set,
};
@@ -455,20 +410,27 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
list_for_each_xattr(entry, base_addr) {
const struct xattr_handler *handler =
f2fs_xattr_handler(entry->e_name_index);
+ const char *prefix;
+ size_t prefix_len;
size_t size;
- if (!handler)
+ if (!handler || (handler->list && !handler->list(dentry)))
continue;
- size = handler->list(handler, dentry, buffer, rest,
- entry->e_name, entry->e_name_len);
- if (buffer && size > rest) {
- error = -ERANGE;
- goto cleanup;
+ prefix = handler->prefix ?: handler->name;
+ prefix_len = strlen(prefix);
+ size = prefix_len + entry->e_name_len + 1;
+ if (buffer) {
+ if (size > rest) {
+ error = -ERANGE;
+ goto cleanup;
+ }
+ memcpy(buffer, prefix, prefix_len);
+ buffer += prefix_len;
+ memcpy(buffer, entry->e_name, entry->e_name_len);
+ buffer += entry->e_name_len;
+ *buffer++ = 0;
}
-
- if (buffer)
- buffer += size;
rest -= size;
}
error = buffer_size - rest;
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index 71a7100d5492..79dccc8252dd 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -27,7 +27,7 @@
#define F2FS_XATTR_REFCOUNT_MAX 1024
/* Name indexes */
-#define F2FS_SYSTEM_ADVISE_PREFIX "system.advise"
+#define F2FS_SYSTEM_ADVISE_NAME "system.advise"
#define F2FS_XATTR_INDEX_USER 1
#define F2FS_XATTR_INDEX_POSIX_ACL_ACCESS 2
#define F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT 3
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 4afc4d9d2e41..8b2127ffb226 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -610,9 +610,9 @@ parse_record:
int status = fat_parse_long(inode, &cpos, &bh, &de,
&unicode, &nr_slots);
if (status < 0) {
- ctx->pos = cpos;
+ bh = NULL;
ret = status;
- goto out;
+ goto end_of_dir;
} else if (status == PARSE_INVALID)
goto record_end;
else if (status == PARSE_NOT_LONGNAME)
@@ -654,8 +654,9 @@ parse_record:
fill_len = short_len;
start_filldir:
- if (!fake_offset)
- ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
+ ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
+ if (fake_offset && ctx->pos < 2)
+ ctx->pos = 2;
if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) {
if (!dir_emit_dot(file, ctx))
@@ -681,14 +682,19 @@ record_end:
fake_offset = 0;
ctx->pos = cpos;
goto get_new;
+
end_of_dir:
- ctx->pos = cpos;
+ if (fake_offset && cpos < 2)
+ ctx->pos = 2;
+ else
+ ctx->pos = cpos;
fill_failed:
brelse(bh);
if (unicode)
__putname(unicode);
out:
mutex_unlock(&sbi->s_lock);
+
return ret;
}
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index ef73ed674a27..3e2ccade61ed 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -326,6 +326,7 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
} else if (S_ISLNK(ip->i_mode)) {
if (!VXFS_ISIMMED(vip)) {
ip->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(ip);
ip->i_mapping->a_ops = &vxfs_aops;
} else {
ip->i_op = &simple_symlink_inode_operations;
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index eae2c11268bc..8e3ee1936c7e 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -549,6 +549,8 @@ static int cuse_channel_release(struct inode *inode, struct file *file)
unregister_chrdev_region(cc->cdev->dev, 1);
cdev_del(cc->cdev);
}
+ /* Base reference is now owned by "fud" */
+ fuse_conn_put(&cc->fc);
rc = fuse_dev_release(inode, file); /* puts the base reference */
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 5e2e08712d3b..712601f299b8 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1365,15 +1365,19 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx)
return err;
}
-static const char *fuse_follow_link(struct dentry *dentry, void **cookie)
+static const char *fuse_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(dentry);
struct fuse_conn *fc = get_fuse_conn(inode);
FUSE_ARGS(args);
char *link;
ssize_t ret;
- link = (char *) __get_free_page(GFP_KERNEL);
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
+ link = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!link)
return ERR_PTR(-ENOMEM);
@@ -1385,11 +1389,11 @@ static const char *fuse_follow_link(struct dentry *dentry, void **cookie)
args.out.args[0].value = link;
ret = fuse_simple_request(fc, &args);
if (ret < 0) {
- free_page((unsigned long) link);
+ kfree(link);
link = ERR_PTR(ret);
} else {
link[ret] = '\0';
- *cookie = link;
+ set_delayed_call(done, kfree_link, link);
}
fuse_invalidate_atime(inode);
return link;
@@ -1909,8 +1913,7 @@ static const struct inode_operations fuse_common_inode_operations = {
static const struct inode_operations fuse_symlink_inode_operations = {
.setattr = fuse_setattr,
- .follow_link = fuse_follow_link,
- .put_link = free_page_put_link,
+ .get_link = fuse_get_link,
.readlink = generic_readlink,
.getattr = fuse_getattr,
.setxattr = fuse_setxattr,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e0faf8f2c868..570ca4053c80 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1049,6 +1049,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
flush_dcache_page(page);
+ iov_iter_advance(ii, tmp);
if (!tmp) {
unlock_page(page);
page_cache_release(page);
@@ -1061,7 +1062,6 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
req->page_descs[req->num_pages].length = tmp;
req->num_pages++;
- iov_iter_advance(ii, tmp);
count += tmp;
pos += tmp;
offset += tmp;
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 1be3b061c05c..791932617d1a 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -31,9 +31,9 @@ static const char *gfs2_acl_name(int type)
{
switch (type) {
case ACL_TYPE_ACCESS:
- return GFS2_POSIX_ACL_ACCESS;
+ return XATTR_POSIX_ACL_ACCESS;
case ACL_TYPE_DEFAULT:
- return GFS2_POSIX_ACL_DEFAULT;
+ return XATTR_POSIX_ACL_DEFAULT;
}
return NULL;
}
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index 2d65ec4cd4be..3af4f407a483 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -12,8 +12,6 @@
#include "incore.h"
-#define GFS2_POSIX_ACL_ACCESS "posix_acl_access"
-#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
#define GFS2_ACL_MAX_ENTRIES(sdp) ((300 << (sdp)->sd_sb.sb_bsize_shift) >> 12)
extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 063fdfcf8275..1bae189f3245 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1712,24 +1712,30 @@ static int gfs2_rename2(struct inode *odir, struct dentry *odentry,
}
/**
- * gfs2_follow_link - Follow a symbolic link
+ * gfs2_get_link - Follow a symbolic link
* @dentry: The dentry of the link
- * @nd: Data that we pass to vfs_follow_link()
+ * @inode: The inode of the link
+ * @done: destructor for return value
*
* This can handle symlinks of any size.
*
* Returns: 0 on success or error code
*/
-static const char *gfs2_follow_link(struct dentry *dentry, void **cookie)
+static const char *gfs2_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct gfs2_inode *ip = GFS2_I(d_inode(dentry));
+ struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder i_gh;
struct buffer_head *dibh;
unsigned int size;
char *buf;
int error;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
error = gfs2_glock_nq(&i_gh);
if (error) {
@@ -1759,7 +1765,7 @@ static const char *gfs2_follow_link(struct dentry *dentry, void **cookie)
out:
gfs2_glock_dq_uninit(&i_gh);
if (!IS_ERR(buf))
- *cookie = buf;
+ set_delayed_call(done, kfree_link, buf);
return buf;
}
@@ -2132,8 +2138,7 @@ const struct inode_operations gfs2_dir_iops = {
const struct inode_operations gfs2_symlink_iops = {
.readlink = generic_readlink,
- .follow_link = gfs2_follow_link,
- .put_link = kfree_put_link,
+ .get_link = gfs2_get_link,
.permission = gfs2_permission,
.setattr = gfs2_setattr,
.getattr = gfs2_getattr,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 53ce76a374fe..84f2d81fe451 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1237,56 +1237,6 @@ static int gfs2_xattr_set(const struct xattr_handler *handler,
size, flags, handler->flags);
}
-
-static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
- struct gfs2_ea_header *ea, char *data)
-{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- unsigned int amount = GFS2_EA_DATA_LEN(ea);
- unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
- int ret;
-
- ret = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
- if (ret)
- return ret;
-
- ret = gfs2_iter_unstuffed(ip, ea, data, NULL);
- gfs2_trans_end(sdp);
-
- return ret;
-}
-
-int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
-{
- struct inode *inode = &ip->i_inode;
- struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct gfs2_ea_location el;
- int error;
-
- error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el);
- if (error)
- return error;
-
- if (GFS2_EA_IS_STUFFED(el.el_ea)) {
- error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0);
- if (error == 0) {
- gfs2_trans_add_meta(ip->i_gl, el.el_bh);
- memcpy(GFS2_EA2DATA(el.el_ea), data,
- GFS2_EA_DATA_LEN(el.el_ea));
- }
- } else {
- error = ea_acl_chmod_unstuffed(ip, el.el_ea, data);
- }
-
- brelse(el.el_bh);
- if (error)
- return error;
-
- error = gfs2_setattr_simple(inode, attr);
- gfs2_trans_end(sdp);
- return error;
-}
-
static int ea_dealloc_indirect(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index d392f8358f2f..2d887c88eb49 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -62,6 +62,5 @@ extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
/* Exported to acl.c */
extern int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **data);
-extern int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data);
#endif /* __EATTR_DOT_H__ */
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 6dd107d7421e..19b33f8151f1 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -403,6 +403,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
} else if (S_ISLNK(inode->i_mode)) {
sbi->file_count++;
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &hfsplus_aops;
hip->clump_blocks = 1;
} else
@@ -526,6 +527,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
inode->i_mapping->a_ops = &hfsplus_aops;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &hfsplus_aops;
} else {
init_special_inode(inode, inode->i_mode,
diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c
index df0c9af68d05..afb33eda6d7d 100644
--- a/fs/hfsplus/posix_acl.c
+++ b/fs/hfsplus/posix_acl.c
@@ -21,10 +21,10 @@ struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type)
switch (type) {
case ACL_TYPE_ACCESS:
- xattr_name = POSIX_ACL_XATTR_ACCESS;
+ xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- xattr_name = POSIX_ACL_XATTR_DEFAULT;
+ xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
return ERR_PTR(-EINVAL);
@@ -66,7 +66,7 @@ int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
switch (type) {
case ACL_TYPE_ACCESS:
- xattr_name = POSIX_ACL_XATTR_ACCESS;
+ xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
err = posix_acl_equiv_mode(acl, &inode->i_mode);
if (err < 0)
@@ -76,7 +76,7 @@ int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
break;
case ACL_TYPE_DEFAULT:
- xattr_name = POSIX_ACL_XATTR_DEFAULT;
+ xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
if (!S_ISDIR(inode->i_mode))
return acl ? -EACCES : 0;
break;
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index e41a010cd89c..ab01530b4930 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -431,9 +431,6 @@ int hfsplus_setxattr(struct dentry *dentry, const char *name,
char *xattr_name;
int res;
- if (!strcmp(name, ""))
- return -EINVAL;
-
xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
GFP_KERNEL);
if (!xattr_name)
@@ -589,9 +586,6 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
int res;
char *xattr_name;
- if (!strcmp(name, ""))
- return -EINVAL;
-
xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
GFP_KERNEL);
if (!xattr_name)
@@ -853,9 +847,6 @@ static int hfsplus_osx_getxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (!strcmp(name, ""))
- return -EINVAL;
-
/*
* Don't allow retrieving properly prefixed attributes
* by prepending them with "osx."
@@ -876,9 +867,6 @@ static int hfsplus_osx_setxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags)
{
- if (!strcmp(name, ""))
- return -EINVAL;
-
/*
* Don't allow setting properly prefixed attributes
* by prepending them with "osx."
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2ac99db3750e..7db524cc85b6 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -892,9 +892,14 @@ static const struct inode_operations hostfs_dir_iops = {
.setattr = hostfs_setattr,
};
-static const char *hostfs_follow_link(struct dentry *dentry, void **cookie)
+static const char *hostfs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- char *link = __getname();
+ char *link;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+ link = kmalloc(PATH_MAX, GFP_KERNEL);
if (link) {
char *path = dentry_name(dentry);
int err = -ENOMEM;
@@ -905,25 +910,20 @@ static const char *hostfs_follow_link(struct dentry *dentry, void **cookie)
__putname(path);
}
if (err < 0) {
- __putname(link);
+ kfree(link);
return ERR_PTR(err);
}
} else {
return ERR_PTR(-ENOMEM);
}
- return *cookie = link;
-}
-
-static void hostfs_put_link(struct inode *unused, void *cookie)
-{
- __putname(cookie);
+ set_delayed_call(done, kfree_link, link);
+ return link;
}
static const struct inode_operations hostfs_link_iops = {
.readlink = generic_readlink,
- .follow_link = hostfs_follow_link,
- .put_link = hostfs_put_link,
+ .get_link = hostfs_get_link,
};
static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 933c73780813..1f3c6d76200b 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -77,6 +77,7 @@ void hpfs_read_inode(struct inode *i)
kfree(ea);
i->i_mode = S_IFLNK | 0777;
i->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(i);
i->i_data.a_ops = &hpfs_symlink_aops;
set_nlink(i, 1);
i->i_size = ea_size;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index ae4d5a1fa4c9..506765afa1a3 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -332,6 +332,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
result->i_blocks = 1;
set_nlink(result, 1);
result->i_size = strlen(symlink);
+ inode_nohighmem(result);
result->i_op = &page_symlink_inode_operations;
result->i_data.a_ops = &hpfs_symlink_aops;
@@ -500,7 +501,7 @@ out:
static int hpfs_symlink_readpage(struct file *file, struct page *page)
{
- char *link = kmap(page);
+ char *link = page_address(page);
struct inode *i = page->mapping->host;
struct fnode *fnode;
struct buffer_head *bh;
@@ -516,14 +517,12 @@ static int hpfs_symlink_readpage(struct file *file, struct page *page)
goto fail;
hpfs_unlock(i->i_sb);
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
fail:
hpfs_unlock(i->i_sb);
SetPageError(page);
- kunmap(page);
unlock_page(page);
return err;
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 316adb968b65..d8f51ee8126b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -332,12 +332,17 @@ static void remove_huge_page(struct page *page)
* truncation is indicated by end of range being LLONG_MAX
* In this case, we first scan the range and release found pages.
* After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
- * maps and global counts.
+ * maps and global counts. Page faults can not race with truncation
+ * in this routine. hugetlb_no_page() prevents page faults in the
+ * truncated range. It checks i_size before allocation, and again after
+ * with the page table lock for the page held. The same lock must be
+ * acquired to unmap a page.
* hole punch is indicated if end is not LLONG_MAX
* In the hole punch case we scan the range and release found pages.
* Only when releasing a page is the associated region/reserv map
* deleted. The region/reserv map for ranges without associated
- * pages are not modified.
+ * pages are not modified. Page faults can race with hole punch.
+ * This is indicated if we find a mapped page.
* Note: If the passed end of range value is beyond the end of file, but
* not LLONG_MAX this routine still performs a hole punch operation.
*/
@@ -361,46 +366,37 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
next = start;
while (next < end) {
/*
- * Make sure to never grab more pages that we
- * might possibly need.
+ * Don't grab more pages than the number left in the range.
*/
if (end - next < lookup_nr)
lookup_nr = end - next;
/*
- * This pagevec_lookup() may return pages past 'end',
- * so we must check for page->index > end.
+ * When no more pages are found, we are done.
*/
- if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) {
- if (next == start)
- break;
- next = start;
- continue;
- }
+ if (!pagevec_lookup(&pvec, mapping, next, lookup_nr))
+ break;
for (i = 0; i < pagevec_count(&pvec); ++i) {
struct page *page = pvec.pages[i];
u32 hash;
+ /*
+ * The page (index) could be beyond end. This is
+ * only possible in the punch hole case as end is
+ * max page offset in the truncate case.
+ */
+ next = page->index;
+ if (next >= end)
+ break;
+
hash = hugetlb_fault_mutex_hash(h, current->mm,
&pseudo_vma,
mapping, next, 0);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
lock_page(page);
- if (page->index >= end) {
- unlock_page(page);
- mutex_unlock(&hugetlb_fault_mutex_table[hash]);
- next = end; /* we are done */
- break;
- }
-
- /*
- * If page is mapped, it was faulted in after being
- * unmapped. Do nothing in this race case. In the
- * normal case page is not mapped.
- */
- if (!page_mapped(page)) {
+ if (likely(!page_mapped(page))) {
bool rsv_on_error = !PagePrivate(page);
/*
* We must free the huge page and remove
@@ -421,17 +417,23 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
hugetlb_fix_reserve_counts(
inode, rsv_on_error);
}
+ } else {
+ /*
+ * If page is mapped, it was faulted in after
+ * being unmapped. It indicates a race between
+ * hole punch and page fault. Do nothing in
+ * this case. Getting here in a truncate
+ * operation is a bug.
+ */
+ BUG_ON(truncate_op);
}
- if (page->index > next)
- next = page->index;
-
- ++next;
unlock_page(page);
-
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
}
+ ++next;
huge_pagevec_release(&pvec);
+ cond_resched();
}
if (truncate_op)
@@ -647,9 +649,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
i_size_write(inode, offset + len);
inode->i_ctime = CURRENT_TIME;
- spin_lock(&inode->i_lock);
- inode->i_private = NULL;
- spin_unlock(&inode->i_lock);
out:
mutex_unlock(&inode->i_mutex);
return error;
@@ -761,6 +760,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
break;
case S_IFLNK:
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
break;
}
lockdep_annotate_inode_mutex_key(inode);
diff --git a/fs/inode.c b/fs/inode.c
index 1be5f9003eb3..5bb85a064ce7 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2028,3 +2028,9 @@ void inode_set_flags(struct inode *inode, unsigned int flags,
new_flags) != old_flags));
}
EXPORT_SYMBOL(inode_set_flags);
+
+void inode_nohighmem(struct inode *inode)
+{
+ mapping_set_gfp_mask(inode->i_mapping, GFP_USER);
+}
+EXPORT_SYMBOL(inode_nohighmem);
diff --git a/fs/internal.h b/fs/internal.h
index 71859c4d0b41..e38c08ca437d 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -151,3 +151,10 @@ extern void mnt_pin_kill(struct mount *m);
* fs/nsfs.c
*/
extern struct dentry_operations ns_dentry_operations;
+
+/*
+ * fs/ioctl.c
+ */
+extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
+ unsigned long arg);
+extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 5d01d2638ca5..41c352e81193 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,6 +15,7 @@
#include <linux/writeback.h>
#include <linux/buffer_head.h>
#include <linux/falloc.h>
+#include "internal.h"
#include <asm/ioctls.h>
@@ -32,8 +33,7 @@
*
* Returns 0 on success, -errno on error.
*/
-static long vfs_ioctl(struct file *filp, unsigned int cmd,
- unsigned long arg)
+long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
int error = -ENOTTY;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index d67a16f2a45d..61abdc4920da 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1417,6 +1417,7 @@ static int isofs_read_inode(struct inode *inode, int relocated)
inode->i_fop = &isofs_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &isofs_symlink_aops;
} else
/* XXX - parse_rock_ridge_inode() had already set i_rdev. */
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 735d7522a3a9..5384ceb35b1c 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -687,7 +687,7 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
struct inode *inode = page->mapping->host;
struct iso_inode_info *ei = ISOFS_I(inode);
struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb);
- char *link = kmap(page);
+ char *link = page_address(page);
unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
struct buffer_head *bh;
char *rpnt = link;
@@ -774,7 +774,6 @@ repeat:
brelse(bh);
*rpnt = '\0';
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
@@ -791,7 +790,6 @@ fail:
brelse(bh);
error:
SetPageError(page);
- kunmap(page);
unlock_page(page);
return -EIO;
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 89463eee6791..ca181e81c765 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1009,7 +1009,8 @@ out:
}
/* Fast check whether buffer is already attached to the required transaction */
-static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh)
+static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh,
+ bool undo)
{
struct journal_head *jh;
bool ret = false;
@@ -1036,6 +1037,9 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh)
jh = READ_ONCE(bh->b_private);
if (!jh)
goto out;
+ /* For undo access buffer must have data copied */
+ if (undo && !jh->b_committed_data)
+ goto out;
if (jh->b_transaction != handle->h_transaction &&
jh->b_next_transaction != handle->h_transaction)
goto out;
@@ -1073,7 +1077,7 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
struct journal_head *jh;
int rc;
- if (jbd2_write_access_granted(handle, bh))
+ if (jbd2_write_access_granted(handle, bh, false))
return 0;
jh = jbd2_journal_add_journal_head(bh);
@@ -1210,7 +1214,7 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
char *committed_data = NULL;
JBUFFER_TRACE(jh, "entry");
- if (jbd2_write_access_granted(handle, bh))
+ if (jbd2_write_access_granted(handle, bh, true))
return 0;
jh = jbd2_journal_add_journal_head(bh);
@@ -2152,6 +2156,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
if (!buffer_dirty(bh)) {
/* bdflush has written it. We can drop it now */
+ __jbd2_journal_remove_checkpoint(jh);
goto zap_buffer;
}
@@ -2181,6 +2186,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
/* The orphan record's transaction has
* committed. We can cleanse this buffer */
clear_buffer_jbddirty(bh);
+ __jbd2_journal_remove_checkpoint(jh);
goto zap_buffer;
}
}
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index bf12fe5f83d7..7a28facd7175 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -52,9 +52,6 @@ static int jffs2_security_getxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (!strcmp(name, ""))
- return -EINVAL;
-
return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_SECURITY,
name, buffer, size);
}
@@ -63,31 +60,12 @@ static int jffs2_security_setxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags)
{
- if (!strcmp(name, ""))
- return -EINVAL;
-
return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_SECURITY,
name, buffer, size, flags);
}
-static size_t jffs2_security_listxattr(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1;
-
- if (list && retlen <= list_size) {
- strcpy(list, XATTR_SECURITY_PREFIX);
- strcpy(list + XATTR_SECURITY_PREFIX_LEN, name);
- }
-
- return retlen;
-}
-
const struct xattr_handler jffs2_security_xattr_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .list = jffs2_security_listxattr,
.set = jffs2_security_setxattr,
.get = jffs2_security_getxattr
};
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index 8ce2f240125b..2cabd649d4fb 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -14,7 +14,7 @@
const struct inode_operations jffs2_symlink_inode_operations =
{
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = jffs2_setattr,
.setxattr = jffs2_setxattr,
.getxattr = jffs2_getxattr,
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 4c2c03663533..da3e18503c65 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -967,7 +967,8 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
struct jffs2_xattr_ref *ref, **pref;
struct jffs2_xattr_datum *xd;
const struct xattr_handler *xhandle;
- ssize_t len, rc;
+ const char *prefix;
+ ssize_t prefix_len, len, rc;
int retry = 0;
rc = check_xattr_ref_inode(c, ic);
@@ -998,18 +999,23 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
}
}
xhandle = xprefix_to_handler(xd->xprefix);
- if (!xhandle)
+ if (!xhandle || (xhandle->list && !xhandle->list(dentry)))
continue;
+ prefix = xhandle->prefix ?: xhandle->name;
+ prefix_len = strlen(prefix);
+ rc = prefix_len + xd->name_len + 1;
+
if (buffer) {
- rc = xhandle->list(xhandle, dentry, buffer + len,
- size - len, xd->xname,
- xd->name_len);
- } else {
- rc = xhandle->list(xhandle, dentry, NULL, 0,
- xd->xname, xd->name_len);
+ if (rc > size - len) {
+ rc = -ERANGE;
+ goto out;
+ }
+ memcpy(buffer, prefix, prefix_len);
+ buffer += prefix_len;
+ memcpy(buffer, xd->xname, xd->name_len);
+ buffer += xd->name_len;
+ *buffer++ = 0;
}
- if (rc < 0)
- goto out;
len += rc;
}
rc = len;
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c
index a562da0d6a26..b2555ef07a12 100644
--- a/fs/jffs2/xattr_trusted.c
+++ b/fs/jffs2/xattr_trusted.c
@@ -20,8 +20,6 @@ static int jffs2_trusted_getxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (!strcmp(name, ""))
- return -EINVAL;
return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_TRUSTED,
name, buffer, size);
}
@@ -30,28 +28,13 @@ static int jffs2_trusted_setxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags)
{
- if (!strcmp(name, ""))
- return -EINVAL;
return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_TRUSTED,
name, buffer, size, flags);
}
-static size_t jffs2_trusted_listxattr(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
+static bool jffs2_trusted_listxattr(struct dentry *dentry)
{
- size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1;
-
- if (!capable(CAP_SYS_ADMIN))
- return 0;
-
- if (list && retlen<=list_size) {
- strcpy(list, XATTR_TRUSTED_PREFIX);
- strcpy(list + XATTR_TRUSTED_PREFIX_LEN, name);
- }
-
- return retlen;
+ return capable(CAP_SYS_ADMIN);
}
const struct xattr_handler jffs2_trusted_xattr_handler = {
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c
index cbc0472e59a8..539bd630b5e4 100644
--- a/fs/jffs2/xattr_user.c
+++ b/fs/jffs2/xattr_user.c
@@ -20,8 +20,6 @@ static int jffs2_user_getxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (!strcmp(name, ""))
- return -EINVAL;
return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_USER,
name, buffer, size);
}
@@ -30,30 +28,12 @@ static int jffs2_user_setxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags)
{
- if (!strcmp(name, ""))
- return -EINVAL;
return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_USER,
name, buffer, size, flags);
}
-static size_t jffs2_user_listxattr(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1;
-
- if (list && retlen <= list_size) {
- strcpy(list, XATTR_USER_PREFIX);
- strcpy(list + XATTR_USER_PREFIX_LEN, name);
- }
-
- return retlen;
-}
-
const struct xattr_handler jffs2_user_xattr_handler = {
.prefix = XATTR_USER_PREFIX,
- .list = jffs2_user_listxattr,
.set = jffs2_user_setxattr,
.get = jffs2_user_getxattr
};
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 0c8ca830b113..49456853e9de 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -40,10 +40,10 @@ struct posix_acl *jfs_get_acl(struct inode *inode, int type)
switch(type) {
case ACL_TYPE_ACCESS:
- ea_name = POSIX_ACL_XATTR_ACCESS;
+ ea_name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- ea_name = POSIX_ACL_XATTR_DEFAULT;
+ ea_name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
return ERR_PTR(-EINVAL);
@@ -82,7 +82,7 @@ static int __jfs_set_acl(tid_t tid, struct inode *inode, int type,
switch (type) {
case ACL_TYPE_ACCESS:
- ea_name = POSIX_ACL_XATTR_ACCESS;
+ ea_name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
rc = posix_acl_equiv_mode(acl, &inode->i_mode);
if (rc < 0)
@@ -94,7 +94,7 @@ static int __jfs_set_acl(tid_t tid, struct inode *inode, int type,
}
break;
case ACL_TYPE_DEFAULT:
- ea_name = POSIX_ACL_XATTR_DEFAULT;
+ ea_name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
return -EINVAL;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 41aa3ca6a6a4..9d9bae63ae2a 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -60,6 +60,7 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino)
} else if (S_ISLNK(inode->i_mode)) {
if (inode->i_size >= IDATASIZE) {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &jfs_aops;
} else {
inode->i_op = &jfs_fast_symlink_inode_operations;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 9d7551f5c32a..701f89370de7 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -983,6 +983,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
jfs_info("jfs_symlink: allocate extent ip:0x%p", ip);
ip->i_op = &jfs_symlink_inode_operations;
+ inode_nohighmem(ip);
ip->i_mapping->a_ops = &jfs_aops;
/*
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index 5929e2363cb8..f8db4fde0b0b 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -23,7 +23,7 @@
const struct inode_operations jfs_fast_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = jfs_setattr,
.setxattr = jfs_setxattr,
.getxattr = jfs_getxattr,
@@ -33,8 +33,7 @@ const struct inode_operations jfs_fast_symlink_inode_operations = {
const struct inode_operations jfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = jfs_setattr,
.setxattr = jfs_setxattr,
.getxattr = jfs_getxattr,
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index 756dd56aaf60..16405ae88d2d 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -205,7 +205,7 @@ int kernfs_iop_removexattr(struct dentry *dentry, const char *name)
if (!attrs)
return -ENOMEM;
- return simple_xattr_remove(&attrs->xattrs, name);
+ return simple_xattr_set(&attrs->xattrs, name, NULL, 0, XATTR_REPLACE);
}
ssize_t kernfs_iop_getxattr(struct dentry *dentry, const char *name, void *buf,
@@ -230,7 +230,7 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
if (!attrs)
return -ENOMEM;
- return simple_xattr_list(&attrs->xattrs, buf, size);
+ return simple_xattr_list(d_inode(dentry), &attrs->xattrs, buf, size);
}
static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index db272528ab5b..117b8b3416f9 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -112,18 +112,25 @@ static int kernfs_getlink(struct dentry *dentry, char *path)
return error;
}
-static const char *kernfs_iop_follow_link(struct dentry *dentry, void **cookie)
+static const char *kernfs_iop_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- int error = -ENOMEM;
- unsigned long page = get_zeroed_page(GFP_KERNEL);
- if (!page)
+ char *body;
+ int error;
+
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+ body = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!body)
return ERR_PTR(-ENOMEM);
- error = kernfs_getlink(dentry, (char *)page);
+ error = kernfs_getlink(dentry, body);
if (unlikely(error < 0)) {
- free_page((unsigned long)page);
+ kfree(body);
return ERR_PTR(error);
}
- return *cookie = (char *)page;
+ set_delayed_call(done, kfree_link, body);
+ return body;
}
const struct inode_operations kernfs_symlink_iops = {
@@ -132,8 +139,7 @@ const struct inode_operations kernfs_symlink_iops = {
.getxattr = kernfs_iop_getxattr,
.listxattr = kernfs_iop_listxattr,
.readlink = generic_readlink,
- .follow_link = kernfs_iop_follow_link,
- .put_link = free_page_put_link,
+ .get_link = kernfs_iop_get_link,
.setattr = kernfs_iop_setattr,
.getattr = kernfs_iop_getattr,
.permission = kernfs_iop_permission,
diff --git a/fs/libfs.c b/fs/libfs.c
index c7cbfb092e94..01491299f348 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1019,17 +1019,12 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
}
EXPORT_SYMBOL(noop_fsync);
-void kfree_put_link(struct inode *unused, void *cookie)
+/* Because kfree isn't assignment-compatible with void(void*) ;-/ */
+void kfree_link(void *p)
{
- kfree(cookie);
+ kfree(p);
}
-EXPORT_SYMBOL(kfree_put_link);
-
-void free_page_put_link(struct inode *unused, void *cookie)
-{
- free_page((unsigned long) cookie);
-}
-EXPORT_SYMBOL(free_page_put_link);
+EXPORT_SYMBOL(kfree_link);
/*
* nop .set_page_dirty method so that people can use .page_mkwrite on
@@ -1092,14 +1087,15 @@ simple_nosetlease(struct file *filp, long arg, struct file_lock **flp,
}
EXPORT_SYMBOL(simple_nosetlease);
-const char *simple_follow_link(struct dentry *dentry, void **cookie)
+const char *simple_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *done)
{
- return d_inode(dentry)->i_link;
+ return inode->i_link;
}
-EXPORT_SYMBOL(simple_follow_link);
+EXPORT_SYMBOL(simple_get_link);
const struct inode_operations simple_symlink_inode_operations = {
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.readlink = generic_readlink
};
EXPORT_SYMBOL(simple_symlink_inode_operations);
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index f9b45d46d4c4..542468e9bfb4 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -528,7 +528,8 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
return PTR_ERR(inode);
- inode->i_op = &logfs_symlink_iops;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &logfs_reg_aops;
return __logfs_create(dir, dentry, inode, target, destlen);
@@ -776,12 +777,6 @@ fail:
return -EIO;
}
-const struct inode_operations logfs_symlink_iops = {
- .readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
-};
-
const struct inode_operations logfs_dir_iops = {
.create = logfs_create,
.link = logfs_link,
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index af49e2d6941a..0fce46d62b9c 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -64,7 +64,8 @@ static void logfs_inode_setops(struct inode *inode)
inode->i_mapping->a_ops = &logfs_reg_aops;
break;
case S_IFLNK:
- inode->i_op = &logfs_symlink_iops;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &logfs_reg_aops;
break;
case S_IFSOCK: /* fall through */
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 5f0937609465..209a26d84c38 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -495,7 +495,6 @@ static inline int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
#endif
/* dir.c */
-extern const struct inode_operations logfs_symlink_iops;
extern const struct inode_operations logfs_dir_iops;
extern const struct file_operations logfs_dir_fops;
int logfs_replay_journal(struct super_block *sb);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 086cd0a61e80..cb1789ca1ee6 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -435,8 +435,7 @@ static const struct address_space_operations minix_aops = {
static const struct inode_operations minix_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.getattr = minix_getattr,
};
@@ -452,6 +451,7 @@ void minix_set_inode(struct inode *inode, dev_t rdev)
inode->i_mapping->a_ops = &minix_aops;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &minix_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &minix_aops;
} else
init_special_inode(inode, inode->i_mode, rdev);
diff --git a/fs/namei.c b/fs/namei.c
index d84d7c7515fc..3c909aebef70 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -505,13 +505,13 @@ struct nameidata {
int total_link_count;
struct saved {
struct path link;
- void *cookie;
+ struct delayed_call done;
const char *name;
- struct inode *inode;
unsigned seq;
} *stack, internal[EMBEDDED_LEVELS];
struct filename *name;
struct nameidata *saved;
+ struct inode *link_inode;
unsigned root_seq;
int dfd;
};
@@ -592,11 +592,8 @@ static void drop_links(struct nameidata *nd)
int i = nd->depth;
while (i--) {
struct saved *last = nd->stack + i;
- struct inode *inode = last->inode;
- if (last->cookie && inode->i_op->put_link) {
- inode->i_op->put_link(inode, last->cookie);
- last->cookie = NULL;
- }
+ do_delayed_call(&last->done);
+ clear_delayed_call(&last->done);
}
}
@@ -842,7 +839,7 @@ static inline void path_to_nameidata(const struct path *path,
}
/*
- * Helper to directly jump to a known parsed path from ->follow_link,
+ * Helper to directly jump to a known parsed path from ->get_link,
* caller must have taken a reference to path beforehand.
*/
void nd_jump_link(struct path *path)
@@ -858,9 +855,7 @@ void nd_jump_link(struct path *path)
static inline void put_link(struct nameidata *nd)
{
struct saved *last = nd->stack + --nd->depth;
- struct inode *inode = last->inode;
- if (last->cookie && inode->i_op->put_link)
- inode->i_op->put_link(inode, last->cookie);
+ do_delayed_call(&last->done);
if (!(nd->flags & LOOKUP_RCU))
path_put(&last->link);
}
@@ -892,7 +887,7 @@ static inline int may_follow_link(struct nameidata *nd)
return 0;
/* Allowed if owner and follower match. */
- inode = nd->stack[0].inode;
+ inode = nd->link_inode;
if (uid_eq(current_cred()->fsuid, inode->i_uid))
return 0;
@@ -983,7 +978,7 @@ const char *get_link(struct nameidata *nd)
{
struct saved *last = nd->stack + nd->depth - 1;
struct dentry *dentry = last->link.dentry;
- struct inode *inode = last->inode;
+ struct inode *inode = nd->link_inode;
int error;
const char *res;
@@ -1004,15 +999,21 @@ const char *get_link(struct nameidata *nd)
nd->last_type = LAST_BIND;
res = inode->i_link;
if (!res) {
+ const char * (*get)(struct dentry *, struct inode *,
+ struct delayed_call *);
+ get = inode->i_op->get_link;
if (nd->flags & LOOKUP_RCU) {
- if (unlikely(unlazy_walk(nd, NULL, 0)))
- return ERR_PTR(-ECHILD);
+ res = get(NULL, inode, &last->done);
+ if (res == ERR_PTR(-ECHILD)) {
+ if (unlikely(unlazy_walk(nd, NULL, 0)))
+ return ERR_PTR(-ECHILD);
+ res = get(dentry, inode, &last->done);
+ }
+ } else {
+ res = get(dentry, inode, &last->done);
}
- res = inode->i_op->follow_link(dentry, &last->cookie);
- if (IS_ERR_OR_NULL(res)) {
- last->cookie = NULL;
+ if (IS_ERR_OR_NULL(res))
return res;
- }
}
if (*res == '/') {
if (nd->flags & LOOKUP_RCU) {
@@ -1691,8 +1692,8 @@ static int pick_link(struct nameidata *nd, struct path *link,
last = nd->stack + nd->depth++;
last->link = *link;
- last->cookie = NULL;
- last->inode = inode;
+ clear_delayed_call(&last->done);
+ nd->link_inode = inode;
last->seq = seq;
return 1;
}
@@ -1996,7 +1997,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
nd->depth = 0;
- nd->total_link_count = 0;
if (flags & LOOKUP_ROOT) {
struct dentry *root = nd->root.dentry;
struct inode *inode = root->d_inode;
@@ -4496,72 +4496,73 @@ EXPORT_SYMBOL(readlink_copy);
/*
* A helper for ->readlink(). This should be used *ONLY* for symlinks that
- * have ->follow_link() touching nd only in nd_set_link(). Using (or not
- * using) it for any given inode is up to filesystem.
+ * have ->get_link() not calling nd_jump_link(). Using (or not using) it
+ * for any given inode is up to filesystem.
*/
int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
- void *cookie;
+ DEFINE_DELAYED_CALL(done);
struct inode *inode = d_inode(dentry);
const char *link = inode->i_link;
int res;
if (!link) {
- link = inode->i_op->follow_link(dentry, &cookie);
+ link = inode->i_op->get_link(dentry, inode, &done);
if (IS_ERR(link))
return PTR_ERR(link);
}
res = readlink_copy(buffer, buflen, link);
- if (inode->i_op->put_link)
- inode->i_op->put_link(inode, cookie);
+ do_delayed_call(&done);
return res;
}
EXPORT_SYMBOL(generic_readlink);
/* get the link contents into pagecache */
-static char *page_getlink(struct dentry * dentry, struct page **ppage)
+const char *page_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *callback)
{
char *kaddr;
struct page *page;
- struct address_space *mapping = dentry->d_inode->i_mapping;
- page = read_mapping_page(mapping, 0, NULL);
- if (IS_ERR(page))
- return (char*)page;
- *ppage = page;
- kaddr = kmap(page);
- nd_terminate_link(kaddr, dentry->d_inode->i_size, PAGE_SIZE - 1);
+ struct address_space *mapping = inode->i_mapping;
+
+ if (!dentry) {
+ page = find_get_page(mapping, 0);
+ if (!page)
+ return ERR_PTR(-ECHILD);
+ if (!PageUptodate(page)) {
+ put_page(page);
+ return ERR_PTR(-ECHILD);
+ }
+ } else {
+ page = read_mapping_page(mapping, 0, NULL);
+ if (IS_ERR(page))
+ return (char*)page;
+ }
+ set_delayed_call(callback, page_put_link, page);
+ BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM);
+ kaddr = page_address(page);
+ nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1);
return kaddr;
}
-int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
-{
- struct page *page = NULL;
- int res = readlink_copy(buffer, buflen, page_getlink(dentry, &page));
- if (page) {
- kunmap(page);
- page_cache_release(page);
- }
- return res;
-}
-EXPORT_SYMBOL(page_readlink);
+EXPORT_SYMBOL(page_get_link);
-const char *page_follow_link_light(struct dentry *dentry, void **cookie)
+void page_put_link(void *arg)
{
- struct page *page = NULL;
- char *res = page_getlink(dentry, &page);
- if (!IS_ERR(res))
- *cookie = page;
- return res;
+ put_page(arg);
}
-EXPORT_SYMBOL(page_follow_link_light);
+EXPORT_SYMBOL(page_put_link);
-void page_put_link(struct inode *unused, void *cookie)
+int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
- struct page *page = cookie;
- kunmap(page);
- page_cache_release(page);
+ DEFINE_DELAYED_CALL(done);
+ int res = readlink_copy(buffer, buflen,
+ page_get_link(dentry, d_inode(dentry),
+ &done));
+ do_delayed_call(&done);
+ return res;
}
-EXPORT_SYMBOL(page_put_link);
+EXPORT_SYMBOL(page_readlink);
/*
* The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
@@ -4572,7 +4573,6 @@ int __page_symlink(struct inode *inode, const char *symname, int len, int nofs)
struct page *page;
void *fsdata;
int err;
- char *kaddr;
unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE;
if (nofs)
flags |= AOP_FLAG_NOFS;
@@ -4583,9 +4583,7 @@ retry:
if (err)
goto fail;
- kaddr = kmap_atomic(page);
- memcpy(kaddr, symname, len-1);
- kunmap_atomic(kaddr);
+ memcpy(page_address(page), symname, len-1);
err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
page, fsdata);
@@ -4610,7 +4608,6 @@ EXPORT_SYMBOL(page_symlink);
const struct inode_operations page_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
};
EXPORT_SYMBOL(page_symlink_inode_operations);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 9605a2f63549..ce1eb3f9dfe8 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -244,8 +244,7 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
static const struct inode_operations ncp_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = ncp_notify_change,
};
#endif
@@ -283,6 +282,7 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &ncp_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &ncp_symlink_aops;
#endif
} else {
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 79b113048eac..0a3f9b594602 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -525,6 +525,8 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
switch (rqdata.cmd) {
case NCP_LOCK_EX:
case NCP_LOCK_SH:
+ if (rqdata.timeout < 0)
+ return -EINVAL;
if (rqdata.timeout == 0)
rqdata.timeout = NCP_LOCK_DEFAULT_TIMEOUT;
else if (rqdata.timeout > NCP_LOCK_MAX_TIMEOUT)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 326d9e10d833..bdb4dc7b4ecd 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -75,11 +75,11 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
* nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
* @word: long word containing the bit lock
*/
-int nfs_wait_bit_killable(struct wait_bit_key *key)
+int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
{
- if (fatal_signal_pending(current))
- return -ERESTARTSYS;
freezable_schedule_unsafe();
+ if (signal_pending_state(mode, current))
+ return -ERESTARTSYS;
return 0;
}
EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
@@ -408,9 +408,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
inode->i_fop = NULL;
inode->i_flags |= S_AUTOMOUNT;
}
- } else if (S_ISLNK(inode->i_mode))
+ } else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &nfs_symlink_inode_operations;
- else
+ inode_nohighmem(inode);
+ } else
init_special_inode(inode, inode->i_mode, fattr->rdev);
memset(&inode->i_atime, 0, sizeof(inode->i_atime));
@@ -618,7 +619,10 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
nfs_vmtruncate(inode, attr->ia_size);
}
- nfs_update_inode(inode, fattr);
+ if (fattr->valid)
+ nfs_update_inode(inode, fattr);
+ else
+ NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR;
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL_GPL(nfs_setattr_update_inode);
@@ -1083,6 +1087,27 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
|| NFS_STALE(inode);
}
+int nfs_revalidate_mapping_rcu(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long *bitlock = &nfsi->flags;
+ int ret = 0;
+
+ if (IS_SWAPFILE(inode))
+ goto out;
+ if (nfs_mapping_need_revalidate_inode(inode)) {
+ ret = -ECHILD;
+ goto out;
+ }
+ spin_lock(&inode->i_lock);
+ if (test_bit(NFS_INO_INVALIDATING, bitlock) ||
+ (nfsi->cache_validity & NFS_INO_INVALID_DATA))
+ ret = -ECHILD;
+ spin_unlock(&inode->i_lock);
+out:
+ return ret;
+}
+
/**
* __nfs_revalidate_mapping - Revalidate the pagecache
* @inode - pointer to host inode
@@ -1824,7 +1849,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
nfsi->attr_gencount = fattr->gencount;
}
- invalid &= ~NFS_INO_INVALID_ATTR;
+
+ /* Don't declare attrcache up to date if there were no attrs! */
+ if (fattr->valid != 0)
+ invalid &= ~NFS_INO_INVALID_ATTR;
+
/* Don't invalidate the data if we were to blame */
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
|| S_ISLNK(inode->i_mode)))
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 56cfde26fb9c..9dea85f7f918 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -379,7 +379,7 @@ extern int nfs_drop_inode(struct inode *);
extern void nfs_clear_inode(struct inode *);
extern void nfs_evict_inode(struct inode *);
void nfs_zap_acl_cache(struct inode *inode);
-extern int nfs_wait_bit_killable(struct wait_bit_key *key);
+extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
/* super.c */
extern const struct super_operations nfs_sops;
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 1ebe2fc7cda2..17c0fa1eccfa 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -284,12 +284,12 @@ nfs3_listxattr(struct dentry *dentry, char *data, size_t size)
int error;
error = nfs3_list_one_acl(inode, ACL_TYPE_ACCESS,
- POSIX_ACL_XATTR_ACCESS, data, size, &result);
+ XATTR_NAME_POSIX_ACL_ACCESS, data, size, &result);
if (error)
return error;
error = nfs3_list_one_acl(inode, ACL_TYPE_DEFAULT,
- POSIX_ACL_XATTR_DEFAULT, data, size, &result);
+ XATTR_NAME_POSIX_ACL_DEFAULT, data, size, &result);
if (error)
return error;
return result;
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 3e92a3cde15d..6b1ce9825430 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -14,7 +14,7 @@
#include "pnfs.h"
#include "internal.h"
-#define NFSDBG_FACILITY NFSDBG_PNFS
+#define NFSDBG_FACILITY NFSDBG_PROC
static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
fmode_t fmode)
@@ -284,6 +284,7 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
.dst_fh = NFS_FH(dst_inode),
.src_offset = src_offset,
.dst_offset = dst_offset,
+ .count = count,
.dst_bitmask = server->cache_consistency_bitmask,
};
struct nfs42_clone_res res = {
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 223bedda64ae..10410e8b5853 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -33,7 +33,7 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
return ret;
idr_preload(GFP_KERNEL);
spin_lock(&nn->nfs_client_lock);
- ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT);
+ ret = idr_alloc(&nn->cb_ident_idr, clp, 1, 0, GFP_NOWAIT);
if (ret >= 0)
clp->cl_cb_ident = ret;
spin_unlock(&nn->nfs_client_lock);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 4aa571956cd6..db9b5fea5b3e 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -7,6 +7,7 @@
#include <linux/file.h>
#include <linux/falloc.h>
#include <linux/nfs_fs.h>
+#include <uapi/linux/btrfs.h> /* BTRFS_IOC_CLONE/BTRFS_IOC_CLONE_RANGE */
#include "delegation.h"
#include "internal.h"
#include "iostat.h"
@@ -203,6 +204,7 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
struct fd src_file;
struct inode *src_inode;
unsigned int bs = server->clone_blksize;
+ bool same_inode = false;
int ret;
/* dst file must be opened for writing */
@@ -221,10 +223,8 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
src_inode = file_inode(src_file.file);
- /* src and dst must be different files */
- ret = -EINVAL;
if (src_inode == dst_inode)
- goto out_fput;
+ same_inode = true;
/* src file must be opened for reading */
if (!(src_file.file->f_mode & FMODE_READ))
@@ -249,8 +249,16 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
goto out_fput;
}
+ /* verify if ranges are overlapped within the same file */
+ if (same_inode) {
+ if (dst_off + count > src_off && dst_off < src_off + count)
+ goto out_fput;
+ }
+
/* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */
- if (dst_inode < src_inode) {
+ if (same_inode) {
+ mutex_lock(&src_inode->i_mutex);
+ } else if (dst_inode < src_inode) {
mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD);
} else {
@@ -275,7 +283,9 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
truncate_inode_pages_range(&dst_inode->i_data, dst_off, dst_off + count - 1);
out_unlock:
- if (dst_inode < src_inode) {
+ if (same_inode) {
+ mutex_unlock(&src_inode->i_mutex);
+ } else if (dst_inode < src_inode) {
mutex_unlock(&src_inode->i_mutex);
mutex_unlock(&dst_inode->i_mutex);
} else {
@@ -291,46 +301,31 @@ out_drop_write:
static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp)
{
- struct nfs_ioctl_clone_range_args args;
+ struct btrfs_ioctl_clone_range_args args;
if (copy_from_user(&args, argp, sizeof(args)))
return -EFAULT;
- return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_off, args.dst_off, args.count);
-}
-#else
-static long nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
- u64 src_off, u64 dst_off, u64 count)
-{
- return -ENOTTY;
-}
-
-static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp)
-{
- return -ENOTTY;
+ return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_offset,
+ args.dest_offset, args.src_length);
}
-#endif /* CONFIG_NFS_V4_2 */
long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
switch (cmd) {
- case NFS_IOC_CLONE:
+ case BTRFS_IOC_CLONE:
return nfs42_ioctl_clone(file, arg, 0, 0, 0);
- case NFS_IOC_CLONE_RANGE:
+ case BTRFS_IOC_CLONE_RANGE:
return nfs42_ioctl_clone_range(file, argp);
}
return -ENOTTY;
}
+#endif /* CONFIG_NFS_V4_2 */
const struct file_operations nfs4_file_operations = {
-#ifdef CONFIG_NFS_V4_2
- .llseek = nfs4_file_llseek,
-#else
- .llseek = nfs_file_llseek,
-#endif
.read_iter = nfs_file_read,
.write_iter = nfs_file_write,
.mmap = nfs_file_mmap,
@@ -342,14 +337,14 @@ const struct file_operations nfs4_file_operations = {
.flock = nfs_flock,
.splice_read = nfs_file_splice_read,
.splice_write = iter_file_splice_write,
-#ifdef CONFIG_NFS_V4_2
- .fallocate = nfs42_fallocate,
-#endif /* CONFIG_NFS_V4_2 */
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NFS_V4_2
+ .llseek = nfs4_file_llseek,
+ .fallocate = nfs42_fallocate,
.unlocked_ioctl = nfs4_ioctl,
-#else
.compat_ioctl = nfs4_ioctl,
-#endif /* CONFIG_COMPAT */
+#else
+ .llseek = nfs_file_llseek,
+#endif
};
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 765a03559363..c57d1332c1c8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6253,9 +6253,6 @@ static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler,
const void *buf, size_t buflen,
int flags)
{
- if (strcmp(key, "") != 0)
- return -EINVAL;
-
return nfs4_proc_set_acl(d_inode(dentry), buf, buflen);
}
@@ -6263,32 +6260,15 @@ static int nfs4_xattr_get_nfs4_acl(const struct xattr_handler *handler,
struct dentry *dentry, const char *key,
void *buf, size_t buflen)
{
- if (strcmp(key, "") != 0)
- return -EINVAL;
-
return nfs4_proc_get_acl(d_inode(dentry), buf, buflen);
}
-static size_t nfs4_xattr_list_nfs4_acl(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_len, const char *name,
- size_t name_len)
+static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry)
{
- size_t len = sizeof(XATTR_NAME_NFSV4_ACL);
-
- if (!nfs4_server_supports_acls(NFS_SERVER(d_inode(dentry))))
- return 0;
-
- if (list && len <= list_len)
- memcpy(list, XATTR_NAME_NFSV4_ACL, len);
- return len;
+ return nfs4_server_supports_acls(NFS_SERVER(d_inode(dentry)));
}
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
-static inline int nfs4_server_supports_labels(struct nfs_server *server)
-{
- return server->caps & NFS_CAP_SECURITY_LABEL;
-}
static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler,
struct dentry *dentry, const char *key,
@@ -6310,29 +6290,34 @@ static int nfs4_xattr_get_nfs4_label(const struct xattr_handler *handler,
return -EOPNOTSUPP;
}
-static size_t nfs4_xattr_list_nfs4_label(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_len, const char *name,
- size_t name_len)
+static ssize_t
+nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
{
- size_t len = 0;
+ int len = 0;
- if (nfs_server_capable(d_inode(dentry), NFS_CAP_SECURITY_LABEL)) {
- len = security_inode_listsecurity(d_inode(dentry), NULL, 0);
- if (list && len <= list_len)
- security_inode_listsecurity(d_inode(dentry), list, len);
+ if (nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) {
+ len = security_inode_listsecurity(inode, list, list_len);
+ if (list_len && len > list_len)
+ return -ERANGE;
}
return len;
}
static const struct xattr_handler nfs4_xattr_nfs4_label_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .list = nfs4_xattr_list_nfs4_label,
.get = nfs4_xattr_get_nfs4_label,
.set = nfs4_xattr_set_nfs4_label,
};
-#endif
+#else
+
+static ssize_t
+nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
+{
+ return 0;
+}
+
+#endif
/*
* nfs_fhget will use either the mounted_on_fileid or the fileid
@@ -7866,7 +7851,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
spin_unlock(&inode->i_lock);
goto out_restart;
}
- if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
+ if (nfs4_async_handle_error(task, server, state, &lgp->timeout) == -EAGAIN)
goto out_restart;
out:
dprintk("<-- %s\n", __func__);
@@ -8749,6 +8734,24 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
#endif
};
+ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+ ssize_t error, error2;
+
+ error = generic_listxattr(dentry, list, size);
+ if (error < 0)
+ return error;
+ if (list) {
+ list += error;
+ size -= error;
+ }
+
+ error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size);
+ if (error2 < 0)
+ return error2;
+ return error + error2;
+}
+
static const struct inode_operations nfs4_dir_inode_operations = {
.create = nfs_create,
.lookup = nfs_lookup,
@@ -8765,7 +8768,7 @@ static const struct inode_operations nfs4_dir_inode_operations = {
.setattr = nfs_setattr,
.getxattr = generic_getxattr,
.setxattr = generic_setxattr,
- .listxattr = generic_listxattr,
+ .listxattr = nfs4_listxattr,
.removexattr = generic_removexattr,
};
@@ -8775,7 +8778,7 @@ static const struct inode_operations nfs4_file_inode_operations = {
.setattr = nfs_setattr,
.getxattr = generic_getxattr,
.setxattr = generic_setxattr,
- .listxattr = generic_listxattr,
+ .listxattr = nfs4_listxattr,
.removexattr = generic_removexattr,
};
@@ -8834,7 +8837,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
};
static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
- .prefix = XATTR_NAME_NFSV4_ACL,
+ .name = XATTR_NAME_NFSV4_ACL,
.list = nfs4_xattr_list_nfs4_acl,
.get = nfs4_xattr_get_nfs4_acl,
.set = nfs4_xattr_set_nfs4_acl,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index dfed4f5c8fcc..4e4441216804 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3615,6 +3615,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
status = 0;
if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS)))
goto out;
+ bitmap[0] &= ~FATTR4_WORD0_FS_LOCATIONS;
status = -EIO;
/* Ignore borken servers that return unrequested attrs */
if (unlikely(res == NULL))
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 5c0c6b58157f..9aebffb40505 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -476,10 +476,7 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
}
unlock_page(page);
}
- if (PageDirty(page) || PageWriteback(page))
- *uptodate = true;
- else
- *uptodate = PageUptodate(page);
+ *uptodate = PageUptodate(page);
dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate);
return page;
}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index fe3ddd20ff89..452a011ba0d8 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -129,7 +129,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c)
set_bit(NFS_IO_INPROGRESS, &c->flags);
if (atomic_read(&c->io_count) == 0)
break;
- ret = nfs_wait_bit_killable(&q.key);
+ ret = nfs_wait_bit_killable(&q.key, TASK_KILLABLE);
} while (atomic_read(&c->io_count) != 0 && !ret);
finish_wait(wq, &q.wait);
return ret;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 93496c059837..bec0384499f7 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -872,33 +872,38 @@ send_layoutget(struct pnfs_layout_hdr *lo,
dprintk("--> %s\n", __func__);
- lgp = kzalloc(sizeof(*lgp), gfp_flags);
- if (lgp == NULL)
- return NULL;
+ /*
+ * Synchronously retrieve layout information from server and
+ * store in lseg. If we race with a concurrent seqid morphing
+ * op, then re-send the LAYOUTGET.
+ */
+ do {
+ lgp = kzalloc(sizeof(*lgp), gfp_flags);
+ if (lgp == NULL)
+ return NULL;
+
+ i_size = i_size_read(ino);
+
+ lgp->args.minlength = PAGE_CACHE_SIZE;
+ if (lgp->args.minlength > range->length)
+ lgp->args.minlength = range->length;
+ if (range->iomode == IOMODE_READ) {
+ if (range->offset >= i_size)
+ lgp->args.minlength = 0;
+ else if (i_size - range->offset < lgp->args.minlength)
+ lgp->args.minlength = i_size - range->offset;
+ }
+ lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
+ lgp->args.range = *range;
+ lgp->args.type = server->pnfs_curr_ld->id;
+ lgp->args.inode = ino;
+ lgp->args.ctx = get_nfs_open_context(ctx);
+ lgp->gfp_flags = gfp_flags;
+ lgp->cred = lo->plh_lc_cred;
- i_size = i_size_read(ino);
+ lseg = nfs4_proc_layoutget(lgp, gfp_flags);
+ } while (lseg == ERR_PTR(-EAGAIN));
- lgp->args.minlength = PAGE_CACHE_SIZE;
- if (lgp->args.minlength > range->length)
- lgp->args.minlength = range->length;
- if (range->iomode == IOMODE_READ) {
- if (range->offset >= i_size)
- lgp->args.minlength = 0;
- else if (i_size - range->offset < lgp->args.minlength)
- lgp->args.minlength = i_size - range->offset;
- }
- lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
- lgp->args.range = *range;
- lgp->args.type = server->pnfs_curr_ld->id;
- lgp->args.inode = ino;
- lgp->args.ctx = get_nfs_open_context(ctx);
- lgp->gfp_flags = gfp_flags;
- lgp->cred = lo->plh_lc_cred;
-
- /* Synchronously retrieve layout information from server and
- * store in lseg.
- */
- lseg = nfs4_proc_layoutget(lgp, gfp_flags);
if (IS_ERR(lseg)) {
switch (PTR_ERR(lseg)) {
case -ENOMEM:
@@ -1461,11 +1466,11 @@ static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
}
/* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */
-static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key)
+static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key, int mode)
{
if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags))
return 1;
- return nfs_wait_bit_killable(key);
+ return nfs_wait_bit_killable(key, mode);
}
static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
@@ -1687,6 +1692,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
/* existing state ID, make sure the sequence number matches. */
if (pnfs_layout_stateid_blocked(lo, &res->stateid)) {
dprintk("%s forget reply due to sequence\n", __func__);
+ status = -EAGAIN;
goto out_forget_reply;
}
pnfs_set_layout_stateid(lo, &res->stateid, false);
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index b6de433da5db..4fe3eead3868 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -42,21 +42,35 @@ error:
return -EIO;
}
-static const char *nfs_follow_link(struct dentry *dentry, void **cookie)
+static const char *nfs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(dentry);
struct page *page;
void *err;
- err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
- if (err)
- return err;
- page = read_cache_page(&inode->i_data, 0,
- (filler_t *)nfs_symlink_filler, inode);
- if (IS_ERR(page))
- return ERR_CAST(page);
- *cookie = page;
- return kmap(page);
+ if (!dentry) {
+ err = ERR_PTR(nfs_revalidate_mapping_rcu(inode));
+ if (err)
+ return err;
+ page = find_get_page(inode->i_mapping, 0);
+ if (!page)
+ return ERR_PTR(-ECHILD);
+ if (!PageUptodate(page)) {
+ put_page(page);
+ return ERR_PTR(-ECHILD);
+ }
+ } else {
+ err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
+ if (err)
+ return err;
+ page = read_cache_page(&inode->i_data, 0,
+ (filler_t *)nfs_symlink_filler, inode);
+ if (IS_ERR(page))
+ return ERR_CAST(page);
+ }
+ set_delayed_call(done, page_put_link, page);
+ return page_address(page);
}
/*
@@ -64,8 +78,7 @@ static const char *nfs_follow_link(struct dentry *dentry, void **cookie)
*/
const struct inode_operations nfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = nfs_follow_link,
- .put_link = page_put_link,
+ .get_link = nfs_get_link,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
};
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 9ffef06b30d5..c9d6c715c0fb 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -616,6 +616,7 @@ nfsd4_cb_layout_prepare(struct nfsd4_callback *cb)
mutex_lock(&ls->ls_mutex);
nfs4_inc_and_copy_stateid(&ls->ls_recall_sid, &ls->ls_stid);
+ mutex_unlock(&ls->ls_mutex);
}
static int
@@ -659,7 +660,6 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb)
trace_layout_recall_release(&ls->ls_stid.sc_stateid);
- mutex_unlock(&ls->ls_mutex);
nfsd4_return_all_layouts(ls, &reaplist);
nfsd4_free_layouts(&reaplist);
nfs4_put_stid(&ls->ls_stid);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index ac2f64943ff4..10b22527a617 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -510,6 +510,7 @@ static int __nilfs_read_inode(struct super_block *sb,
inode->i_mapping->a_ops = &nilfs_aops;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &nilfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &nilfs_aops;
} else {
inode->i_op = &nilfs_special_inode_operations;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index c9a1a491aa91..7ccdb961eea9 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -161,6 +161,7 @@ static int nilfs_symlink(struct inode *dir, struct dentry *dentry,
/* slow symlink */
inode->i_op = &nilfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &nilfs_aops;
err = page_symlink(inode, symname, l);
if (err)
@@ -568,8 +569,7 @@ const struct inode_operations nilfs_special_inode_operations = {
const struct inode_operations nilfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.permission = nilfs_permission,
};
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index ce38b4ccc9ab..84f2f8079466 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2843,6 +2843,8 @@ again:
res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
if (!ret)
BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
+ else
+ res->migration_pending = 0;
spin_unlock(&res->spinlock);
/*
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 8f87e05ee25d..97a563bab9a8 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -361,6 +361,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
break;
case S_IFLNK:
inode->i_op = &ocfs2_symlink_inode_operations;
+ inode_nohighmem(inode);
i_size_write(inode, le64_to_cpu(fe->i_size));
break;
default:
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
index 652ece4a9d9e..d56f0079b858 100644
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -67,7 +67,10 @@ static int ocfs2_do_flock(struct file *file, struct inode *inode,
*/
locks_lock_file_wait(file,
- &(struct file_lock){.fl_type = F_UNLCK});
+ &(struct file_lock) {
+ .fl_type = F_UNLCK,
+ .fl_flags = FL_FLOCK
+ });
ocfs2_file_unlock(file);
}
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 3b48ac25d8a7..afb81eae2c18 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -367,7 +367,7 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
- status = posix_acl_create(dir, &mode, &default_acl, &acl);
+ status = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
if (status) {
mlog_errno(status);
goto leave;
@@ -1958,6 +1958,7 @@ static int ocfs2_symlink(struct inode *dir,
inode->i_rdev = 0;
newsize = l - 1;
inode->i_op = &ocfs2_symlink_inode_operations;
+ inode_nohighmem(inode);
if (l > ocfs2_fast_symlink_chars(sb)) {
u32 offset = 0;
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index d5da6f624142..79b8021302b3 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -54,11 +54,12 @@
static u16 ocfs2_calc_new_backup_super(struct inode *inode,
struct ocfs2_group_desc *gd,
u16 cl_cpg,
+ u16 old_bg_clusters,
int set)
{
int i;
u16 backups = 0;
- u32 cluster;
+ u32 cluster, lgd_cluster;
u64 blkno, gd_blkno, lgd_blkno = le64_to_cpu(gd->bg_blkno);
for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
@@ -71,6 +72,12 @@ static u16 ocfs2_calc_new_backup_super(struct inode *inode,
else if (gd_blkno > lgd_blkno)
break;
+ /* check if already done backup super */
+ lgd_cluster = ocfs2_blocks_to_clusters(inode->i_sb, lgd_blkno);
+ lgd_cluster += old_bg_clusters;
+ if (lgd_cluster >= cluster)
+ continue;
+
if (set)
ocfs2_set_bit(cluster % cl_cpg,
(unsigned long *)gd->bg_bitmap);
@@ -99,6 +106,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
u16 chain, num_bits, backups = 0;
u16 cl_bpc = le16_to_cpu(cl->cl_bpc);
u16 cl_cpg = le16_to_cpu(cl->cl_cpg);
+ u16 old_bg_clusters;
trace_ocfs2_update_last_group_and_inode(new_clusters,
first_new_cluster);
@@ -112,6 +120,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
group = (struct ocfs2_group_desc *)group_bh->b_data;
+ old_bg_clusters = le16_to_cpu(group->bg_bits) / cl_bpc;
/* update the group first. */
num_bits = new_clusters * cl_bpc;
le16_add_cpu(&group->bg_bits, num_bits);
@@ -125,7 +134,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
OCFS2_FEATURE_COMPAT_BACKUP_SB)) {
backups = ocfs2_calc_new_backup_super(bm_inode,
group,
- cl_cpg, 1);
+ cl_cpg, old_bg_clusters, 1);
le16_add_cpu(&group->bg_free_bits_count, -1 * backups);
}
@@ -163,7 +172,7 @@ out_rollback:
if (ret < 0) {
ocfs2_calc_new_backup_super(bm_inode,
group,
- cl_cpg, 0);
+ cl_cpg, old_bg_clusters, 0);
le16_add_cpu(&group->bg_free_bits_count, backups);
le16_add_cpu(&group->bg_bits, -1 * num_bits);
le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits);
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 66edce7ecfd7..6c2a3e3c521c 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -88,8 +88,7 @@ const struct address_space_operations ocfs2_fast_symlink_aops = {
const struct inode_operations ocfs2_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.getattr = ocfs2_getattr,
.setattr = ocfs2_setattr,
.setxattr = generic_setxattr,
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e9164f09841b..f0e241ffd94f 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -544,8 +544,7 @@ static inline const char *ocfs2_xattr_prefix(int name_index)
if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
handler = ocfs2_xattr_handler_map[name_index];
-
- return handler ? handler->prefix : NULL;
+ return handler ? xattr_prefix(handler) : NULL;
}
static u32 ocfs2_xattr_name_hash(struct inode *inode,
@@ -884,14 +883,39 @@ static int ocfs2_xattr_value_truncate(struct inode *inode,
return ret;
}
-static int ocfs2_xattr_list_entry(char *buffer, size_t size,
- size_t *result, const char *prefix,
+static int ocfs2_xattr_list_entry(struct super_block *sb,
+ char *buffer, size_t size,
+ size_t *result, int type,
const char *name, int name_len)
{
char *p = buffer + *result;
- int prefix_len = strlen(prefix);
- int total_len = prefix_len + name_len + 1;
+ const char *prefix;
+ int prefix_len;
+ int total_len;
+ switch(type) {
+ case OCFS2_XATTR_INDEX_USER:
+ if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
+ return 0;
+ break;
+
+ case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS:
+ case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT:
+ if (!(sb->s_flags & MS_POSIXACL))
+ return 0;
+ break;
+
+ case OCFS2_XATTR_INDEX_TRUSTED:
+ if (!capable(CAP_SYS_ADMIN))
+ return 0;
+ break;
+ }
+
+ prefix = ocfs2_xattr_prefix(type);
+ if (!prefix)
+ return 0;
+ prefix_len = strlen(prefix);
+ total_len = prefix_len + name_len + 1;
*result += total_len;
/* we are just looking for how big our buffer needs to be */
@@ -914,23 +938,20 @@ static int ocfs2_xattr_list_entries(struct inode *inode,
{
size_t result = 0;
int i, type, ret;
- const char *prefix, *name;
+ const char *name;
for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
type = ocfs2_xattr_get_type(entry);
- prefix = ocfs2_xattr_prefix(type);
-
- if (prefix) {
- name = (const char *)header +
- le16_to_cpu(entry->xe_name_offset);
+ name = (const char *)header +
+ le16_to_cpu(entry->xe_name_offset);
- ret = ocfs2_xattr_list_entry(buffer, buffer_size,
- &result, prefix, name,
- entry->xe_name_len);
- if (ret)
- return ret;
- }
+ ret = ocfs2_xattr_list_entry(inode->i_sb,
+ buffer, buffer_size,
+ &result, type, name,
+ entry->xe_name_len);
+ if (ret)
+ return ret;
}
return result;
@@ -4033,32 +4054,30 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
int ret = 0, type;
struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
int i, block_off, new_offset;
- const char *prefix, *name;
+ const char *name;
for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
type = ocfs2_xattr_get_type(entry);
- prefix = ocfs2_xattr_prefix(type);
- if (prefix) {
- ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
- bucket_xh(bucket),
- i,
- &block_off,
- &new_offset);
- if (ret)
- break;
+ ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
+ bucket_xh(bucket),
+ i,
+ &block_off,
+ &new_offset);
+ if (ret)
+ break;
- name = (const char *)bucket_block(bucket, block_off) +
- new_offset;
- ret = ocfs2_xattr_list_entry(xl->buffer,
- xl->buffer_size,
- &xl->result,
- prefix, name,
- entry->xe_name_len);
- if (ret)
- break;
- }
+ name = (const char *)bucket_block(bucket, block_off) +
+ new_offset;
+ ret = ocfs2_xattr_list_entry(inode->i_sb,
+ xl->buffer,
+ xl->buffer_size,
+ &xl->result,
+ type, name,
+ entry->xe_name_len);
+ if (ret)
+ break;
}
return ret;
@@ -7226,31 +7245,14 @@ int ocfs2_init_security_and_acl(struct inode *dir,
leave:
return ret;
}
+
/*
* 'security' attributes support
*/
-static size_t ocfs2_xattr_security_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
- memcpy(list + prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
static int ocfs2_xattr_security_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY,
name, buffer, size);
}
@@ -7259,9 +7261,6 @@ static int ocfs2_xattr_security_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY,
name, value, size, flags);
}
@@ -7314,7 +7313,6 @@ int ocfs2_init_security_set(handle_t *handle,
const struct xattr_handler ocfs2_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .list = ocfs2_xattr_security_list,
.get = ocfs2_xattr_security_get,
.set = ocfs2_xattr_security_set,
};
@@ -7322,31 +7320,10 @@ const struct xattr_handler ocfs2_xattr_security_handler = {
/*
* 'trusted' attributes support
*/
-static size_t ocfs2_xattr_trusted_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (!capable(CAP_SYS_ADMIN))
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
- memcpy(list + prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED,
name, buffer, size);
}
@@ -7355,16 +7332,12 @@ static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED,
name, value, size, flags);
}
const struct xattr_handler ocfs2_xattr_trusted_handler = {
.prefix = XATTR_TRUSTED_PREFIX,
- .list = ocfs2_xattr_trusted_list,
.get = ocfs2_xattr_trusted_get,
.set = ocfs2_xattr_trusted_set,
};
@@ -7372,34 +7345,12 @@ const struct xattr_handler ocfs2_xattr_trusted_handler = {
/*
* 'user' attributes support
*/
-static size_t ocfs2_xattr_user_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- const size_t prefix_len = XATTR_USER_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
- struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
-
- if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_USER_PREFIX, prefix_len);
- memcpy(list + prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
static int ocfs2_xattr_user_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
return -EOPNOTSUPP;
return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_USER, name,
@@ -7412,8 +7363,6 @@ static int ocfs2_xattr_user_set(const struct xattr_handler *handler,
{
struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
return -EOPNOTSUPP;
@@ -7423,7 +7372,6 @@ static int ocfs2_xattr_user_set(const struct xattr_handler *handler,
const struct xattr_handler ocfs2_xattr_user_handler = {
.prefix = XATTR_USER_PREFIX,
- .list = ocfs2_xattr_user_list,
.get = ocfs2_xattr_user_get,
.set = ocfs2_xattr_user_set,
};
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 871fcb67be97..0a8983492d91 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -195,8 +195,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
struct dentry *dentry, struct path *lowerpath,
- struct kstat *stat, struct iattr *attr,
- const char *link)
+ struct kstat *stat, const char *link)
{
struct inode *wdir = workdir->d_inode;
struct inode *udir = upperdir->d_inode;
@@ -240,8 +239,6 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
mutex_lock(&newdentry->d_inode->i_mutex);
err = ovl_set_attr(newdentry, stat);
- if (!err && attr)
- err = notify_change(newdentry, attr, NULL);
mutex_unlock(&newdentry->d_inode->i_mutex);
if (err)
goto out_cleanup;
@@ -286,8 +283,7 @@ out_cleanup:
* that point the file will have already been copied up anyway.
*/
int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
- struct path *lowerpath, struct kstat *stat,
- struct iattr *attr)
+ struct path *lowerpath, struct kstat *stat)
{
struct dentry *workdir = ovl_workdir(dentry);
int err;
@@ -345,26 +341,19 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
}
upperdentry = ovl_dentry_upper(dentry);
if (upperdentry) {
- unlock_rename(workdir, upperdir);
+ /* Raced with another copy-up? Nothing to do, then... */
err = 0;
- /* Raced with another copy-up? Do the setattr here */
- if (attr) {
- mutex_lock(&upperdentry->d_inode->i_mutex);
- err = notify_change(upperdentry, attr, NULL);
- mutex_unlock(&upperdentry->d_inode->i_mutex);
- }
- goto out_put_cred;
+ goto out_unlock;
}
err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
- stat, attr, link);
+ stat, link);
if (!err) {
/* Restore timestamps on parent (best effort) */
ovl_set_timestamps(upperdir, &pstat);
}
out_unlock:
unlock_rename(workdir, upperdir);
-out_put_cred:
revert_creds(old_cred);
put_cred(override_cred);
@@ -406,7 +395,7 @@ int ovl_copy_up(struct dentry *dentry)
ovl_path_lower(next, &lowerpath);
err = vfs_getattr(&lowerpath, &stat);
if (!err)
- err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL);
+ err = ovl_copy_up_one(parent, next, &lowerpath, &stat);
dput(parent);
dput(next);
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index ec0c2a050043..964a60fa7afc 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -12,8 +12,7 @@
#include <linux/xattr.h>
#include "overlayfs.h"
-static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr,
- bool no_data)
+static int ovl_copy_up_truncate(struct dentry *dentry)
{
int err;
struct dentry *parent;
@@ -30,10 +29,8 @@ static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr,
if (err)
goto out_dput_parent;
- if (no_data)
- stat.size = 0;
-
- err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr);
+ stat.size = 0;
+ err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat);
out_dput_parent:
dput(parent);
@@ -49,13 +46,13 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
if (err)
goto out;
- upperdentry = ovl_dentry_upper(dentry);
- if (upperdentry) {
+ err = ovl_copy_up(dentry);
+ if (!err) {
+ upperdentry = ovl_dentry_upper(dentry);
+
mutex_lock(&upperdentry->d_inode->i_mutex);
err = notify_change(upperdentry, attr, NULL);
mutex_unlock(&upperdentry->d_inode->i_mutex);
- } else {
- err = ovl_copy_up_last(dentry, attr, false);
}
ovl_drop_write(dentry);
out:
@@ -134,57 +131,23 @@ out_dput:
return err;
}
-
-struct ovl_link_data {
- struct dentry *realdentry;
- void *cookie;
-};
-
-static const char *ovl_follow_link(struct dentry *dentry, void **cookie)
+static const char *ovl_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
struct dentry *realdentry;
struct inode *realinode;
- struct ovl_link_data *data = NULL;
- const char *ret;
+
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
realdentry = ovl_dentry_real(dentry);
realinode = realdentry->d_inode;
- if (WARN_ON(!realinode->i_op->follow_link))
+ if (WARN_ON(!realinode->i_op->get_link))
return ERR_PTR(-EPERM);
- if (realinode->i_op->put_link) {
- data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
- if (!data)
- return ERR_PTR(-ENOMEM);
- data->realdentry = realdentry;
- }
-
- ret = realinode->i_op->follow_link(realdentry, cookie);
- if (IS_ERR_OR_NULL(ret)) {
- kfree(data);
- return ret;
- }
-
- if (data)
- data->cookie = *cookie;
-
- *cookie = data;
-
- return ret;
-}
-
-static void ovl_put_link(struct inode *unused, void *c)
-{
- struct inode *realinode;
- struct ovl_link_data *data = c;
-
- if (!data)
- return;
-
- realinode = data->realdentry->d_inode;
- realinode->i_op->put_link(realinode, data->cookie);
- kfree(data);
+ return realinode->i_op->get_link(realdentry, realinode, done);
}
static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
@@ -353,7 +316,7 @@ struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags)
return ERR_PTR(err);
if (file_flags & O_TRUNC)
- err = ovl_copy_up_last(dentry, NULL, true);
+ err = ovl_copy_up_truncate(dentry);
else
err = ovl_copy_up(dentry);
ovl_drop_write(dentry);
@@ -381,8 +344,7 @@ static const struct inode_operations ovl_file_inode_operations = {
static const struct inode_operations ovl_symlink_inode_operations = {
.setattr = ovl_setattr,
- .follow_link = ovl_follow_link,
- .put_link = ovl_put_link,
+ .get_link = ovl_get_link,
.readlink = ovl_readlink,
.getattr = ovl_getattr,
.setxattr = ovl_setxattr,
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index ea5a40b06e3a..e17154aeaae4 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -194,7 +194,6 @@ void ovl_cleanup(struct inode *dir, struct dentry *dentry);
/* copy_up.c */
int ovl_copy_up(struct dentry *dentry);
int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
- struct path *lowerpath, struct kstat *stat,
- struct iattr *attr);
+ struct path *lowerpath, struct kstat *stat);
int ovl_copy_xattr(struct dentry *old, struct dentry *new);
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 4adde1e2cbec..711dd5170376 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -769,8 +769,6 @@ posix_acl_xattr_get(const struct xattr_handler *handler,
struct posix_acl *acl;
int error;
- if (strcmp(name, "") != 0)
- return -EINVAL;
if (!IS_POSIXACL(d_backing_inode(dentry)))
return -EOPNOTSUPP;
if (d_is_symlink(dentry))
@@ -797,8 +795,6 @@ posix_acl_xattr_set(const struct xattr_handler *handler,
struct posix_acl *acl = NULL;
int ret;
- if (strcmp(name, "") != 0)
- return -EINVAL;
if (!IS_POSIXACL(inode))
return -EOPNOTSUPP;
if (!inode->i_op->set_acl)
@@ -827,25 +823,14 @@ out:
return ret;
}
-static size_t
-posix_acl_xattr_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool
+posix_acl_xattr_list(struct dentry *dentry)
{
- const char *xname = handler->prefix;
- size_t size;
-
- if (!IS_POSIXACL(d_backing_inode(dentry)))
- return 0;
-
- size = strlen(xname) + 1;
- if (list && size <= list_size)
- memcpy(list, xname, size);
- return size;
+ return IS_POSIXACL(d_backing_inode(dentry));
}
const struct xattr_handler posix_acl_access_xattr_handler = {
- .prefix = POSIX_ACL_XATTR_ACCESS,
+ .name = XATTR_NAME_POSIX_ACL_ACCESS,
.flags = ACL_TYPE_ACCESS,
.list = posix_acl_xattr_list,
.get = posix_acl_xattr_get,
@@ -854,7 +839,7 @@ const struct xattr_handler posix_acl_access_xattr_handler = {
EXPORT_SYMBOL_GPL(posix_acl_access_xattr_handler);
const struct xattr_handler posix_acl_default_xattr_handler = {
- .prefix = POSIX_ACL_XATTR_DEFAULT,
+ .name = XATTR_NAME_POSIX_ACL_DEFAULT,
.flags = ACL_TYPE_DEFAULT,
.list = posix_acl_xattr_list,
.get = posix_acl_xattr_get,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index bd3e9e68125b..55e01f88eac9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1564,12 +1564,16 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
return -ENOENT;
}
-static const char *proc_pid_follow_link(struct dentry *dentry, void **cookie)
+static const char *proc_pid_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(dentry);
struct path path;
int error = -EACCES;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
/* Are we allowed to snoop on the tasks file descriptors? */
if (!proc_fd_access_allowed(inode))
goto out;
@@ -1630,7 +1634,7 @@ out:
const struct inode_operations proc_pid_link_inode_operations = {
.readlink = proc_pid_readlink,
- .follow_link = proc_pid_follow_link,
+ .get_link = proc_pid_get_link,
.setattr = proc_setattr,
};
@@ -1895,7 +1899,7 @@ static const struct dentry_operations tid_map_files_dentry_operations = {
.d_delete = pid_delete_dentry,
};
-static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
+static int map_files_get_link(struct dentry *dentry, struct path *path)
{
unsigned long vm_start, vm_end;
struct vm_area_struct *vma;
@@ -1945,20 +1949,22 @@ struct map_files_info {
* path to the file in question.
*/
static const char *
-proc_map_files_follow_link(struct dentry *dentry, void **cookie)
+proc_map_files_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
- return proc_pid_follow_link(dentry, NULL);
+ return proc_pid_get_link(dentry, inode, done);
}
/*
- * Identical to proc_pid_link_inode_operations except for follow_link()
+ * Identical to proc_pid_link_inode_operations except for get_link()
*/
static const struct inode_operations proc_map_files_link_inode_operations = {
.readlink = proc_pid_readlink,
- .follow_link = proc_map_files_follow_link,
+ .get_link = proc_map_files_get_link,
.setattr = proc_setattr,
};
@@ -1975,7 +1981,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
return -ENOENT;
ei = PROC_I(inode);
- ei->op.proc_get_link = proc_map_files_get_link;
+ ei->op.proc_get_link = map_files_get_link;
inode->i_op = &proc_map_files_link_inode_operations;
inode->i_size = 64;
@@ -2494,6 +2500,7 @@ static ssize_t proc_coredump_filter_write(struct file *file,
mm = get_task_mm(task);
if (!mm)
goto out_no_mm;
+ ret = 0;
for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
if (val & mask)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index bd95b9fdebb0..d0e9b9b6223e 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -393,24 +393,25 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
};
#endif
-static const char *proc_follow_link(struct dentry *dentry, void **cookie)
+static void proc_put_link(void *p)
{
- struct proc_dir_entry *pde = PDE(d_inode(dentry));
- if (unlikely(!use_pde(pde)))
- return ERR_PTR(-EINVAL);
- *cookie = pde;
- return pde->data;
+ unuse_pde(p);
}
-static void proc_put_link(struct inode *unused, void *p)
+static const char *proc_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- unuse_pde(p);
+ struct proc_dir_entry *pde = PDE(inode);
+ if (unlikely(!use_pde(pde)))
+ return ERR_PTR(-EINVAL);
+ set_delayed_call(done, proc_put_link, pde);
+ return pde->data;
}
const struct inode_operations proc_link_inode_operations = {
.readlink = generic_readlink,
- .follow_link = proc_follow_link,
- .put_link = proc_put_link,
+ .get_link = proc_get_link,
};
struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index f6e8354b8cea..1dece8781f91 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -30,14 +30,18 @@ static const struct proc_ns_operations *ns_entries[] = {
&mntns_operations,
};
-static const char *proc_ns_follow_link(struct dentry *dentry, void **cookie)
+static const char *proc_ns_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(dentry);
const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
struct task_struct *task;
struct path ns_path;
void *error = ERR_PTR(-EACCES);
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
task = get_proc_task(inode);
if (!task)
return error;
@@ -74,7 +78,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
static const struct inode_operations proc_ns_link_inode_operations = {
.readlink = proc_ns_readlink,
- .follow_link = proc_ns_follow_link,
+ .get_link = proc_ns_get_link,
.setattr = proc_setattr,
};
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 113b8d061fc0..67e8db442cf0 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -18,26 +18,28 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
return readlink_copy(buffer, buflen, tmp);
}
-static const char *proc_self_follow_link(struct dentry *dentry, void **cookie)
+static const char *proc_self_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+ struct pid_namespace *ns = inode->i_sb->s_fs_info;
pid_t tgid = task_tgid_nr_ns(current, ns);
char *name;
if (!tgid)
return ERR_PTR(-ENOENT);
/* 11 for max length of signed int in decimal + NULL term */
- name = kmalloc(12, GFP_KERNEL);
- if (!name)
- return ERR_PTR(-ENOMEM);
+ name = kmalloc(12, dentry ? GFP_KERNEL : GFP_ATOMIC);
+ if (unlikely(!name))
+ return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD);
sprintf(name, "%d", tgid);
- return *cookie = name;
+ set_delayed_call(done, kfree_link, name);
+ return name;
}
static const struct inode_operations proc_self_inode_operations = {
.readlink = proc_self_readlink,
- .follow_link = proc_self_follow_link,
- .put_link = kfree_put_link,
+ .get_link = proc_self_get_link,
};
static unsigned self_inum;
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index 947b0f4fd0a1..9eacd59e0360 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -19,26 +19,29 @@ static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer,
return readlink_copy(buffer, buflen, tmp);
}
-static const char *proc_thread_self_follow_link(struct dentry *dentry, void **cookie)
+static const char *proc_thread_self_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+ struct pid_namespace *ns = inode->i_sb->s_fs_info;
pid_t tgid = task_tgid_nr_ns(current, ns);
pid_t pid = task_pid_nr_ns(current, ns);
char *name;
if (!pid)
return ERR_PTR(-ENOENT);
- name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL);
- if (!name)
- return ERR_PTR(-ENOMEM);
+ name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF,
+ dentry ? GFP_KERNEL : GFP_ATOMIC);
+ if (unlikely(!name))
+ return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD);
sprintf(name, "%d/task/%d", tgid, pid);
- return *cookie = name;
+ set_delayed_call(done, kfree_link, name);
+ return name;
}
static const struct inode_operations proc_thread_self_inode_operations = {
.readlink = proc_thread_self_readlink,
- .follow_link = proc_thread_self_follow_link,
- .put_link = kfree_put_link,
+ .get_link = proc_thread_self_get_link,
};
static unsigned thread_self_inum;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index c4bcb778886e..f37b3deb01b4 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -316,6 +316,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
inode->i_fop = &qnx4_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &qnx4_aops;
qnx4_i(inode)->mmu_private = inode->i_size;
} else {
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 32d2e1a9774c..9728b5499e1d 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -582,6 +582,7 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
inode->i_mapping->a_ops = &qnx6_aops;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &qnx6_aops;
} else
init_special_inode(inode, inode->i_mode, 0);
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 889d558b4e05..38981b037524 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -79,6 +79,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
break;
case S_IFLNK:
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
break;
}
}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 3d8e7e671d5b..ae9e5b308cf9 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1361,6 +1361,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
inode->i_fop = &reiserfs_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &reiserfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &reiserfs_address_space_operations;
} else {
inode->i_blocks = 0;
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 47f96988fdd4..2a12d46d7fb4 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1170,6 +1170,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
reiserfs_update_inode_transaction(parent_dir);
inode->i_op = &reiserfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &reiserfs_address_space_operations;
retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name,
@@ -1664,8 +1665,7 @@ const struct inode_operations reiserfs_dir_inode_operations = {
*/
const struct inode_operations reiserfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = reiserfs_setattr,
.setxattr = reiserfs_setxattr,
.getxattr = reiserfs_getxattr,
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 66b26fdfff8d..e5ddb4e5ea94 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -756,7 +756,8 @@ find_xattr_handler_prefix(const struct xattr_handler **handlers,
return NULL;
for_each_xattr_handler(handlers, xah) {
- if (strncmp(xah->prefix, name, strlen(xah->prefix)) == 0)
+ const char *prefix = xattr_prefix(xah);
+ if (strncmp(prefix, name, strlen(prefix)) == 0)
break;
}
@@ -839,19 +840,16 @@ static int listxattr_filler(struct dir_context *ctx, const char *name,
handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
name);
- if (!handler) /* Unsupported xattr name */
+ if (!handler /* Unsupported xattr name */ ||
+ (handler->list && !handler->list(b->dentry)))
return 0;
+ size = namelen + 1;
if (b->buf) {
- size = handler->list(handler, b->dentry,
- b->buf + b->pos, b->size, name,
- namelen);
if (size > b->size)
return -ERANGE;
- } else {
- size = handler->list(handler, b->dentry,
- NULL, 0, name, namelen);
+ memcpy(b->buf + b->pos, name, namelen);
+ b->buf[b->pos + namelen] = 0;
}
-
b->pos += size;
}
return 0;
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 4b34b9dc03dd..558a16beaacb 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -186,10 +186,10 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
BUG();
@@ -244,7 +244,7 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
error = posix_acl_equiv_mode(acl, &inode->i_mode);
if (error < 0)
@@ -256,7 +256,7 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
}
break;
case ACL_TYPE_DEFAULT:
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
if (!S_ISDIR(inode->i_mode))
return acl ? -EACCES : 0;
break;
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index ac659af431ae..ab0217d32039 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -34,21 +34,9 @@ security_set(const struct xattr_handler *handler, struct dentry *dentry,
return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
}
-static size_t security_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_len,
- const char *name, size_t namelen)
+static bool security_list(struct dentry *dentry)
{
- const size_t len = namelen + 1;
-
- if (IS_PRIVATE(d_inode(dentry)))
- return 0;
-
- if (list && len <= list_len) {
- memcpy(list, name, namelen);
- list[namelen] = '\0';
- }
-
- return len;
+ return !IS_PRIVATE(d_inode(dentry));
}
/* Initializes the security context for a new inode and returns the number
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a338adf1b8b4..64b67aa643a9 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -33,20 +33,9 @@ trusted_set(const struct xattr_handler *handler, struct dentry *dentry,
return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
}
-static size_t trusted_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool trusted_list(struct dentry *dentry)
{
- const size_t len = name_len + 1;
-
- if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(d_inode(dentry)))
- return 0;
-
- if (list && len <= list_size) {
- memcpy(list, name, name_len);
- list[name_len] = '\0';
- }
- return len;
+ return capable(CAP_SYS_ADMIN) && !IS_PRIVATE(d_inode(dentry));
}
const struct xattr_handler reiserfs_xattr_trusted_handler = {
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 39c9667191c5..12e6306f562a 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -30,19 +30,9 @@ user_set(const struct xattr_handler *handler, struct dentry *dentry,
return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
}
-static size_t user_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool user_list(struct dentry *dentry)
{
- const size_t len = name_len + 1;
-
- if (!reiserfs_xattrs_user(dentry->d_sb))
- return 0;
- if (list && len <= list_size) {
- memcpy(list, name, name_len);
- list[name_len] = '\0';
- }
- return len;
+ return reiserfs_xattrs_user(dentry->d_sb);
}
const struct xattr_handler reiserfs_xattr_user_handler = {
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 268733cda397..bb894e78a821 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -360,6 +360,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
break;
case ROMFH_SYM:
i->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(i);
i->i_data.a_ops = &romfs_aops;
mode |= S_IRWXUGO;
break;
diff --git a/fs/splice.c b/fs/splice.c
index 801c21cd77fe..4cf700d50b40 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -809,6 +809,13 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
*/
static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
{
+ /*
+ * Check for signal early to make process killable when there are
+ * always buffers available
+ */
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+
while (!pipe->nrbufs) {
if (!pipe->writers)
return 0;
@@ -884,6 +891,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
splice_from_pipe_begin(sd);
do {
+ cond_resched();
ret = splice_from_pipe_next(pipe, sd);
if (ret > 0)
ret = splice_from_pipe_feed(pipe, sd, actor);
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index a1ce5ce60632..0927b1e80ab6 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -41,6 +41,7 @@
#include <linux/fs.h>
#include <linux/vfs.h>
#include <linux/xattr.h>
+#include <linux/pagemap.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -291,6 +292,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
inode->i_size = le32_to_cpu(sqsh_ino->symlink_size);
inode->i_op = &squashfs_symlink_inode_ops;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &squashfs_symlink_aops;
inode->i_mode |= S_IFLNK;
squashfs_i(inode)->start = block;
diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c
index 12806dffb345..dbcc2f54bad4 100644
--- a/fs/squashfs/symlink.c
+++ b/fs/squashfs/symlink.c
@@ -119,8 +119,7 @@ const struct address_space_operations squashfs_symlink_aops = {
const struct inode_operations squashfs_symlink_inode_ops = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.getxattr = generic_getxattr,
.listxattr = squashfs_listxattr
};
diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c
index 6a4cc344085c..1e9de96288d8 100644
--- a/fs/squashfs/xattr.c
+++ b/fs/squashfs/xattr.c
@@ -58,7 +58,7 @@ ssize_t squashfs_listxattr(struct dentry *d, char *buffer,
struct squashfs_xattr_entry entry;
struct squashfs_xattr_val val;
const struct xattr_handler *handler;
- int name_size, prefix_size = 0;
+ int name_size;
err = squashfs_read_metadata(sb, &entry, &start, &offset,
sizeof(entry));
@@ -67,15 +67,16 @@ ssize_t squashfs_listxattr(struct dentry *d, char *buffer,
name_size = le16_to_cpu(entry.size);
handler = squashfs_xattr_handler(le16_to_cpu(entry.type));
- if (handler)
- prefix_size = handler->list(handler, d, buffer, rest,
- NULL, name_size);
- if (prefix_size) {
+ if (handler && (!handler->list || handler->list(d))) {
+ const char *prefix = handler->prefix ?: handler->name;
+ size_t prefix_size = strlen(prefix);
+
if (buffer) {
if (prefix_size + name_size + 1 > rest) {
err = -ERANGE;
goto failed;
}
+ memcpy(buffer, prefix, prefix_size);
buffer += prefix_size;
}
err = squashfs_read_metadata(sb, buffer, &start,
@@ -212,25 +213,10 @@ failed:
}
-static size_t squashfs_xattr_handler_list(const struct xattr_handler *handler,
- struct dentry *d, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- int len = strlen(handler->prefix);
-
- if (list && len <= list_size)
- memcpy(list, handler->prefix, len);
- return len;
-}
-
static int squashfs_xattr_handler_get(const struct xattr_handler *handler,
struct dentry *d, const char *name,
void *buffer, size_t size)
{
- if (name[0] == '\0')
- return -EINVAL;
-
return squashfs_xattr_get(d_inode(d), handler->flags, name,
buffer, size);
}
@@ -241,22 +227,15 @@ static int squashfs_xattr_handler_get(const struct xattr_handler *handler,
static const struct xattr_handler squashfs_xattr_user_handler = {
.prefix = XATTR_USER_PREFIX,
.flags = SQUASHFS_XATTR_USER,
- .list = squashfs_xattr_handler_list,
.get = squashfs_xattr_handler_get
};
/*
* Trusted namespace support
*/
-static size_t squashfs_trusted_xattr_handler_list(const struct xattr_handler *handler,
- struct dentry *d, char *list,
- size_t list_size, const char *name,
- size_t name_len)
+static bool squashfs_trusted_xattr_handler_list(struct dentry *d)
{
- if (!capable(CAP_SYS_ADMIN))
- return 0;
- return squashfs_xattr_handler_list(handler, d, list, list_size, name,
- name_len);
+ return capable(CAP_SYS_ADMIN);
}
static const struct xattr_handler squashfs_xattr_trusted_handler = {
@@ -272,7 +251,6 @@ static const struct xattr_handler squashfs_xattr_trusted_handler = {
static const struct xattr_handler squashfs_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.flags = SQUASHFS_XATTR_SECURITY,
- .list = squashfs_xattr_handler_list,
.get = squashfs_xattr_handler_get
};
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 590ad9206e3f..07ac18c355e7 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -146,8 +146,7 @@ static inline void write3byte(struct sysv_sb_info *sbi,
static const struct inode_operations sysv_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.getattr = sysv_getattr,
};
@@ -162,15 +161,9 @@ void sysv_set_inode(struct inode *inode, dev_t rdev)
inode->i_fop = &sysv_dir_operations;
inode->i_mapping->a_ops = &sysv_aops;
} else if (S_ISLNK(inode->i_mode)) {
- if (inode->i_blocks) {
- inode->i_op = &sysv_symlink_inode_operations;
- inode->i_mapping->a_ops = &sysv_aops;
- } else {
- inode->i_op = &simple_symlink_inode_operations;
- inode->i_link = (char *)SYSV_I(inode)->i_data;
- nd_terminate_link(inode->i_link, inode->i_size,
- sizeof(SYSV_I(inode)->i_data) - 1);
- }
+ inode->i_op = &sysv_symlink_inode_operations;
+ inode_nohighmem(inode);
+ inode->i_mapping->a_ops = &sysv_aops;
} else
init_special_inode(inode, inode->i_mode, rdev);
}
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 0edc12856147..eff62801acbf 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1608,7 +1608,7 @@ const struct inode_operations ubifs_file_inode_operations = {
const struct inode_operations ubifs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = ubifs_setattr,
.getattr = ubifs_getattr,
.setxattr = ubifs_setxattr,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 8d0b3ade0ff0..055746350d16 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1540,7 +1540,8 @@ reread:
break;
case ICBTAG_FILE_TYPE_SYMLINK:
inode->i_data.a_ops = &udf_symlink_aops;
- inode->i_op = &udf_symlink_inode_operations;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mode = S_IFLNK | S_IRWXUGO;
break;
case ICBTAG_FILE_TYPE_MAIN:
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index c97b5a8d1e24..42eafb91f7ff 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -921,7 +921,8 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
}
inode->i_data.a_ops = &udf_symlink_aops;
- inode->i_op = &udf_symlink_inode_operations;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
struct kernel_lb_addr eloc;
@@ -1344,8 +1345,3 @@ const struct inode_operations udf_dir_inode_operations = {
.rename = udf_rename,
.tmpfile = udf_tmpfile,
};
-const struct inode_operations udf_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
-};
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 862535b3ba58..8d619773056b 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -107,7 +107,7 @@ static int udf_symlink_filler(struct file *file, struct page *page)
struct buffer_head *bh = NULL;
unsigned char *symlink;
int err;
- unsigned char *p = kmap(page);
+ unsigned char *p = page_address(page);
struct udf_inode_info *iinfo;
uint32_t pos;
@@ -141,7 +141,6 @@ static int udf_symlink_filler(struct file *file, struct page *page)
up_read(&iinfo->i_data_sem);
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
@@ -149,7 +148,6 @@ out_unlock_inode:
up_read(&iinfo->i_data_sem);
SetPageError(page);
out_unmap:
- kunmap(page);
unlock_page(page);
return err;
}
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 47bb3f5ca360..ce169b49429d 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -85,7 +85,6 @@ extern const struct inode_operations udf_dir_inode_operations;
extern const struct file_operations udf_dir_operations;
extern const struct inode_operations udf_file_inode_operations;
extern const struct file_operations udf_file_operations;
-extern const struct inode_operations udf_symlink_inode_operations;
extern const struct address_space_operations udf_aops;
extern const struct address_space_operations udf_adinicb_aops;
extern const struct address_space_operations udf_symlink_aops;
diff --git a/fs/ufs/Makefile b/fs/ufs/Makefile
index 392db25c0b56..ec4a6b49fa13 100644
--- a/fs/ufs/Makefile
+++ b/fs/ufs/Makefile
@@ -5,5 +5,5 @@
obj-$(CONFIG_UFS_FS) += ufs.o
ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \
- namei.o super.o symlink.o util.o
+ namei.o super.o util.o
ccflags-$(CONFIG_UFS_DEBUG) += -DDEBUG
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index a064cf44b143..d897e169ab9c 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -528,11 +528,12 @@ static void ufs_set_inode_ops(struct inode *inode)
inode->i_mapping->a_ops = &ufs_aops;
} else if (S_ISLNK(inode->i_mode)) {
if (!inode->i_blocks) {
- inode->i_op = &ufs_fast_symlink_inode_operations;
inode->i_link = (char *)UFS_I(inode)->i_u1.i_symlink;
+ inode->i_op = &simple_symlink_inode_operations;
} else {
- inode->i_op = &ufs_symlink_inode_operations;
inode->i_mapping->a_ops = &ufs_aops;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
}
} else
init_special_inode(inode, inode->i_mode,
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 47966554317c..acf4a3b61b81 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -123,14 +123,15 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) {
/* slow symlink */
- inode->i_op = &ufs_symlink_inode_operations;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &ufs_aops;
err = page_symlink(inode, symname, l);
if (err)
goto out_fail;
} else {
/* fast symlink */
- inode->i_op = &ufs_fast_symlink_inode_operations;
+ inode->i_op = &simple_symlink_inode_operations;
inode->i_link = (char *)UFS_I(inode)->i_u1.i_symlink;
memcpy(inode->i_link, symname, l);
inode->i_size = l-1;
diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c
deleted file mode 100644
index 874480bb43e9..000000000000
--- a/fs/ufs/symlink.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * linux/fs/ufs/symlink.c
- *
- * Only fast symlinks left here - the rest is done by generic code. AV, 1999
- *
- * Copyright (C) 1998
- * Daniel Pirkl <daniel.pirkl@emai.cz>
- * Charles University, Faculty of Mathematics and Physics
- *
- * from
- *
- * linux/fs/ext2/symlink.c
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- * from
- *
- * linux/fs/minix/symlink.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * ext2 symlink handling code
- */
-
-#include "ufs_fs.h"
-#include "ufs.h"
-
-const struct inode_operations ufs_fast_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = simple_follow_link,
- .setattr = ufs_setattr,
-};
-
-const struct inode_operations ufs_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
- .setattr = ufs_setattr,
-};
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 7da4aca868c0..c87f4c3fa9dd 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -136,10 +136,6 @@ extern __printf(3, 4)
void ufs_panic(struct super_block *, const char *, const char *, ...);
void ufs_mark_sb_dirty(struct super_block *sb);
-/* symlink.c */
-extern const struct inode_operations ufs_fast_symlink_inode_operations;
-extern const struct inode_operations ufs_symlink_inode_operations;
-
static inline struct ufs_sb_info *UFS_SB(struct super_block *sb)
{
return sb->s_fs_info;
diff --git a/fs/xattr.c b/fs/xattr.c
index 9b932b95d74e..d7f5037a17b5 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -208,25 +208,6 @@ vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value,
return error;
}
-/* Compare an extended attribute value with the given value */
-int vfs_xattr_cmp(struct dentry *dentry, const char *xattr_name,
- const char *value, size_t size, gfp_t flags)
-{
- char *xattr_value = NULL;
- int rc;
-
- rc = vfs_getxattr_alloc(dentry, xattr_name, &xattr_value, 0, flags);
- if (rc < 0)
- return rc;
-
- if ((rc != size) || (memcmp(xattr_value, value, rc) != 0))
- rc = -EINVAL;
- else
- rc = 0;
- kfree(xattr_value);
- return rc;
-}
-
ssize_t
vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size)
{
@@ -700,13 +681,20 @@ xattr_resolve_name(const struct xattr_handler **handlers, const char **name)
return NULL;
for_each_xattr_handler(handlers, handler) {
- const char *n = strcmp_prefix(*name, handler->prefix);
+ const char *n;
+
+ n = strcmp_prefix(*name, xattr_prefix(handler));
if (n) {
+ if (!handler->prefix ^ !*n) {
+ if (*n)
+ continue;
+ return ERR_PTR(-EINVAL);
+ }
*name = n;
- break;
+ return handler;
}
}
- return handler;
+ return ERR_PTR(-EOPNOTSUPP);
}
/*
@@ -718,8 +706,8 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s
const struct xattr_handler *handler;
handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
- if (!handler)
- return -EOPNOTSUPP;
+ if (IS_ERR(handler))
+ return PTR_ERR(handler);
return handler->get(handler, dentry, name, buffer, size);
}
@@ -735,19 +723,25 @@ generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
if (!buffer) {
for_each_xattr_handler(handlers, handler) {
- size += handler->list(handler, dentry, NULL, 0,
- NULL, 0);
+ if (!handler->name ||
+ (handler->list && !handler->list(dentry)))
+ continue;
+ size += strlen(handler->name) + 1;
}
} else {
char *buf = buffer;
+ size_t len;
for_each_xattr_handler(handlers, handler) {
- size = handler->list(handler, dentry, buf, buffer_size,
- NULL, 0);
- if (size > buffer_size)
+ if (!handler->name ||
+ (handler->list && !handler->list(dentry)))
+ continue;
+ len = strlen(handler->name);
+ if (len + 1 > buffer_size)
return -ERANGE;
- buf += size;
- buffer_size -= size;
+ memcpy(buf, handler->name, len + 1);
+ buf += len + 1;
+ buffer_size -= len + 1;
}
size = buf - buffer;
}
@@ -765,8 +759,8 @@ generic_setxattr(struct dentry *dentry, const char *name, const void *value, siz
if (size == 0)
value = ""; /* empty EA, do not remove */
handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
- if (!handler)
- return -EOPNOTSUPP;
+ if (IS_ERR(handler))
+ return PTR_ERR(handler);
return handler->set(handler, dentry, name, value, size, flags);
}
@@ -780,8 +774,8 @@ generic_removexattr(struct dentry *dentry, const char *name)
const struct xattr_handler *handler;
handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
- if (!handler)
- return -EOPNOTSUPP;
+ if (IS_ERR(handler))
+ return PTR_ERR(handler);
return handler->set(handler, dentry, name, NULL, 0, XATTR_REPLACE);
}
@@ -808,7 +802,7 @@ EXPORT_SYMBOL(generic_removexattr);
const char *xattr_full_name(const struct xattr_handler *handler,
const char *name)
{
- size_t prefix_len = strlen(handler->prefix);
+ size_t prefix_len = strlen(xattr_prefix(handler));
return name - prefix_len;
}
@@ -863,8 +857,22 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
return ret;
}
-static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
- const void *value, size_t size, int flags)
+/**
+ * simple_xattr_set - xattr SET operation for in-memory/pseudo filesystems
+ * @xattrs: target simple_xattr list
+ * @name: name of the extended attribute
+ * @value: value of the xattr. If %NULL, will remove the attribute.
+ * @size: size of the new xattr
+ * @flags: %XATTR_{CREATE|REPLACE}
+ *
+ * %XATTR_CREATE is set, the xattr shouldn't exist already; otherwise fails
+ * with -EEXIST. If %XATTR_REPLACE is set, the xattr should exist;
+ * otherwise, fails with -ENODATA.
+ *
+ * Returns 0 on success, -errno on failure.
+ */
+int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
+ const void *value, size_t size, int flags)
{
struct simple_xattr *xattr;
struct simple_xattr *new_xattr = NULL;
@@ -914,73 +922,64 @@ out:
}
-/**
- * simple_xattr_set - xattr SET operation for in-memory/pseudo filesystems
- * @xattrs: target simple_xattr list
- * @name: name of the new extended attribute
- * @value: value of the new xattr. If %NULL, will remove the attribute
- * @size: size of the new xattr
- * @flags: %XATTR_{CREATE|REPLACE}
- *
- * %XATTR_CREATE is set, the xattr shouldn't exist already; otherwise fails
- * with -EEXIST. If %XATTR_REPLACE is set, the xattr should exist;
- * otherwise, fails with -ENODATA.
- *
- * Returns 0 on success, -errno on failure.
- */
-int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
- const void *value, size_t size, int flags)
-{
- if (size == 0)
- value = ""; /* empty EA, do not remove */
- return __simple_xattr_set(xattrs, name, value, size, flags);
-}
-
-/*
- * xattr REMOVE operation for in-memory/pseudo filesystems
- */
-int simple_xattr_remove(struct simple_xattrs *xattrs, const char *name)
+static bool xattr_is_trusted(const char *name)
{
- return __simple_xattr_set(xattrs, name, NULL, 0, XATTR_REPLACE);
+ return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
}
-static bool xattr_is_trusted(const char *name)
+static int xattr_list_one(char **buffer, ssize_t *remaining_size,
+ const char *name)
{
- return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
+ size_t len = strlen(name) + 1;
+ if (*buffer) {
+ if (*remaining_size < len)
+ return -ERANGE;
+ memcpy(*buffer, name, len);
+ *buffer += len;
+ }
+ *remaining_size -= len;
+ return 0;
}
/*
* xattr LIST operation for in-memory/pseudo filesystems
*/
-ssize_t simple_xattr_list(struct simple_xattrs *xattrs, char *buffer,
- size_t size)
+ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
+ char *buffer, size_t size)
{
bool trusted = capable(CAP_SYS_ADMIN);
struct simple_xattr *xattr;
- size_t used = 0;
+ ssize_t remaining_size = size;
+ int err;
+
+#ifdef CONFIG_FS_POSIX_ACL
+ if (inode->i_acl) {
+ err = xattr_list_one(&buffer, &remaining_size,
+ XATTR_NAME_POSIX_ACL_ACCESS);
+ if (err)
+ return err;
+ }
+ if (inode->i_default_acl) {
+ err = xattr_list_one(&buffer, &remaining_size,
+ XATTR_NAME_POSIX_ACL_DEFAULT);
+ if (err)
+ return err;
+ }
+#endif
spin_lock(&xattrs->lock);
list_for_each_entry(xattr, &xattrs->head, list) {
- size_t len;
-
/* skip "trusted." attributes for unprivileged callers */
if (!trusted && xattr_is_trusted(xattr->name))
continue;
- len = strlen(xattr->name) + 1;
- used += len;
- if (buffer) {
- if (size < used) {
- used = -ERANGE;
- break;
- }
- memcpy(buffer, xattr->name, len);
- buffer += len;
- }
+ err = xattr_list_one(&buffer, &remaining_size, xattr->name);
+ if (err)
+ return err;
}
spin_unlock(&xattrs->lock);
- return used;
+ return size - remaining_size;
}
/*
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 6bb470fbb8e8..2d5df1f23bbc 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -252,29 +252,6 @@ xfs_set_mode(struct inode *inode, umode_t mode)
return error;
}
-static int
-xfs_acl_exists(struct inode *inode, unsigned char *name)
-{
- int len = XFS_ACL_MAX_SIZE(XFS_M(inode->i_sb));
-
- return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
- ATTR_ROOT|ATTR_KERNOVAL) == 0);
-}
-
-int
-posix_acl_access_exists(struct inode *inode)
-{
- return xfs_acl_exists(inode, SGI_ACL_FILE);
-}
-
-int
-posix_acl_default_exists(struct inode *inode)
-{
- if (!S_ISDIR(inode->i_mode))
- return 0;
- return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
-}
-
int
xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 52f8255d6bdf..286fa89217f5 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -24,16 +24,12 @@ struct posix_acl;
#ifdef CONFIG_XFS_POSIX_ACL
extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-extern int posix_acl_access_exists(struct inode *inode);
-extern int posix_acl_default_exists(struct inode *inode);
#else
static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type)
{
return NULL;
}
# define xfs_set_acl NULL
-# define posix_acl_access_exists(inode) 0
-# define posix_acl_default_exists(inode) 0
#endif /* CONFIG_XFS_POSIX_ACL */
extern void xfs_forget_acl(struct inode *inode, const char *name, int xflags);
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 245268a0cdf0..06eafafe636e 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -414,13 +414,17 @@ xfs_vn_rename(
* uio is kmalloced for this reason...
*/
STATIC const char *
-xfs_vn_follow_link(
+xfs_vn_get_link(
struct dentry *dentry,
- void **cookie)
+ struct inode *inode,
+ struct delayed_call *done)
{
char *link;
int error = -ENOMEM;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
if (!link)
goto out_err;
@@ -429,7 +433,8 @@ xfs_vn_follow_link(
if (unlikely(error))
goto out_kfree;
- return *cookie = link;
+ set_delayed_call(done, kfree_link, link);
+ return link;
out_kfree:
kfree(link);
@@ -1172,8 +1177,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
static const struct inode_operations xfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = xfs_vn_follow_link,
- .put_link = kfree_put_link,
+ .get_link = xfs_vn_get_link,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 839b35ca21c6..110f1d7d86b0 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -39,9 +39,6 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *dentry,
struct xfs_inode *ip = XFS_I(d_inode(dentry));
int error, asize = size;
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
/* Convert Linux syscall to XFS internal ATTR flags */
if (!size) {
xflags |= ATTR_KERNOVAL;
@@ -84,9 +81,6 @@ xfs_xattr_set(const struct xattr_handler *handler, struct dentry *dentry,
struct xfs_inode *ip = XFS_I(d_inode(dentry));
int error;
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
/* Convert Linux syscall to XFS internal ATTR flags */
if (flags & XATTR_CREATE)
xflags |= ATTR_CREATE;
@@ -135,47 +129,19 @@ const struct xattr_handler *xfs_xattr_handlers[] = {
NULL
};
-static unsigned int xfs_xattr_prefix_len(int flags)
-{
- if (flags & XFS_ATTR_SECURE)
- return sizeof("security");
- else if (flags & XFS_ATTR_ROOT)
- return sizeof("trusted");
- else
- return sizeof("user");
-}
-
-static const char *xfs_xattr_prefix(int flags)
-{
- if (flags & XFS_ATTR_SECURE)
- return xfs_xattr_security_handler.prefix;
- else if (flags & XFS_ATTR_ROOT)
- return xfs_xattr_trusted_handler.prefix;
- else
- return xfs_xattr_user_handler.prefix;
-}
-
static int
-xfs_xattr_put_listent(
+__xfs_xattr_put_listent(
struct xfs_attr_list_context *context,
- int flags,
- unsigned char *name,
- int namelen,
- int valuelen,
- unsigned char *value)
+ char *prefix,
+ int prefix_len,
+ unsigned char *name,
+ int namelen)
{
- unsigned int prefix_len = xfs_xattr_prefix_len(flags);
char *offset;
int arraytop;
- ASSERT(context->count >= 0);
-
- /*
- * Only show root namespace entries if we are actually allowed to
- * see them.
- */
- if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
- return 0;
+ if (!context->alist)
+ goto compute_size;
arraytop = context->count + prefix_len + namelen + 1;
if (arraytop > context->firstu) {
@@ -183,17 +149,19 @@ xfs_xattr_put_listent(
return 1;
}
offset = (char *)context->alist + context->count;
- strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
+ strncpy(offset, prefix, prefix_len);
offset += prefix_len;
strncpy(offset, (char *)name, namelen); /* real name */
offset += namelen;
*offset = '\0';
+
+compute_size:
context->count += prefix_len + namelen + 1;
return 0;
}
static int
-xfs_xattr_put_listent_sizes(
+xfs_xattr_put_listent(
struct xfs_attr_list_context *context,
int flags,
unsigned char *name,
@@ -201,24 +169,55 @@ xfs_xattr_put_listent_sizes(
int valuelen,
unsigned char *value)
{
- context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
- return 0;
-}
+ char *prefix;
+ int prefix_len;
-static int
-list_one_attr(const char *name, const size_t len, void *data,
- size_t size, ssize_t *result)
-{
- char *p = data + *result;
+ ASSERT(context->count >= 0);
- *result += len;
- if (!size)
- return 0;
- if (*result > size)
- return -ERANGE;
+ if (flags & XFS_ATTR_ROOT) {
+#ifdef CONFIG_XFS_POSIX_ACL
+ if (namelen == SGI_ACL_FILE_SIZE &&
+ strncmp(name, SGI_ACL_FILE,
+ SGI_ACL_FILE_SIZE) == 0) {
+ int ret = __xfs_xattr_put_listent(
+ context, XATTR_SYSTEM_PREFIX,
+ XATTR_SYSTEM_PREFIX_LEN,
+ XATTR_POSIX_ACL_ACCESS,
+ strlen(XATTR_POSIX_ACL_ACCESS));
+ if (ret)
+ return ret;
+ } else if (namelen == SGI_ACL_DEFAULT_SIZE &&
+ strncmp(name, SGI_ACL_DEFAULT,
+ SGI_ACL_DEFAULT_SIZE) == 0) {
+ int ret = __xfs_xattr_put_listent(
+ context, XATTR_SYSTEM_PREFIX,
+ XATTR_SYSTEM_PREFIX_LEN,
+ XATTR_POSIX_ACL_DEFAULT,
+ strlen(XATTR_POSIX_ACL_DEFAULT));
+ if (ret)
+ return ret;
+ }
+#endif
- strcpy(p, name);
- return 0;
+ /*
+ * Only show root namespace entries if we are actually allowed to
+ * see them.
+ */
+ if (!capable(CAP_SYS_ADMIN))
+ return 0;
+
+ prefix = XATTR_TRUSTED_PREFIX;
+ prefix_len = XATTR_TRUSTED_PREFIX_LEN;
+ } else if (flags & XFS_ATTR_SECURE) {
+ prefix = XATTR_SECURITY_PREFIX;
+ prefix_len = XATTR_SECURITY_PREFIX_LEN;
+ } else {
+ prefix = XATTR_USER_PREFIX;
+ prefix_len = XATTR_USER_PREFIX_LEN;
+ }
+
+ return __xfs_xattr_put_listent(context, prefix, prefix_len, name,
+ namelen);
}
ssize_t
@@ -227,7 +226,6 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
struct xfs_attr_list_context context;
struct attrlist_cursor_kern cursor = { 0 };
struct inode *inode = d_inode(dentry);
- int error;
/*
* First read the regular on-disk attributes.
@@ -236,37 +234,14 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
context.dp = XFS_I(inode);
context.cursor = &cursor;
context.resynch = 1;
- context.alist = data;
+ context.alist = size ? data : NULL;
context.bufsize = size;
context.firstu = context.bufsize;
-
- if (size)
- context.put_listent = xfs_xattr_put_listent;
- else
- context.put_listent = xfs_xattr_put_listent_sizes;
+ context.put_listent = xfs_xattr_put_listent;
xfs_attr_list_int(&context);
if (context.count < 0)
return -ERANGE;
- /*
- * Then add the two synthetic ACL attributes.
- */
- if (posix_acl_access_exists(inode)) {
- error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
- strlen(POSIX_ACL_XATTR_ACCESS) + 1,
- data, size, &context.count);
- if (error)
- return error;
- }
-
- if (posix_acl_default_exists(inode)) {
- error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
- strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
- data, size, &context.count);
- if (error)
- return error;
- }
-
return context.count;
}
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index b42afada1280..0f45f93ef692 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -93,7 +93,7 @@
#endif /* CONFIG_SMP */
#ifndef smp_store_mb
-#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#endif
#ifndef smp_mb__before_atomic
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 14b0ff32fb9f..3a6803cb0ec9 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -569,7 +569,7 @@ static inline int track_pfn_copy(struct vm_area_struct *vma)
}
/*
- * untrack_pfn_vma is called while unmapping a pfnmap for a region.
+ * untrack_pfn is called while unmapping a pfnmap for a region.
* untrack can be called for a specific region indicated by pfn and size or
* can be for the entire vma (in which case pfn, size are zero).
*/
@@ -577,6 +577,13 @@ static inline void untrack_pfn(struct vm_area_struct *vma,
unsigned long pfn, unsigned long size)
{
}
+
+/*
+ * untrack_pfn_moved is called while mremapping a pfnmap for a new region.
+ */
+static inline void untrack_pfn_moved(struct vm_area_struct *vma)
+{
+}
#else
extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long addr,
@@ -586,6 +593,7 @@ extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
extern int track_pfn_copy(struct vm_area_struct *vma);
extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size);
+extern void untrack_pfn_moved(struct vm_area_struct *vma);
#endif
#ifdef __HAVE_COLOR_ZERO_PAGE
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index e2aadbc7151f..39e1cb201b8e 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -12,8 +12,9 @@
* GNU General Public License for more details.
*
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
+ * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
*
- * Authors: Waiman Long <waiman.long@hp.com>
+ * Authors: Waiman Long <waiman.long@hpe.com>
*/
#ifndef __ASM_GENERIC_QSPINLOCK_H
#define __ASM_GENERIC_QSPINLOCK_H
@@ -62,7 +63,7 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
static __always_inline int queued_spin_trylock(struct qspinlock *lock)
{
if (!atomic_read(&lock->val) &&
- (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) == 0))
+ (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
return 1;
return 0;
}
@@ -77,7 +78,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
{
u32 val;
- val = atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL);
+ val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
if (likely(val == 0))
return;
queued_spin_lock_slowpath(lock, val);
@@ -93,7 +94,7 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock)
/*
* smp_mb__before_atomic() in order to guarantee release semantics
*/
- smp_mb__before_atomic_dec();
+ smp_mb__before_atomic();
atomic_sub(_Q_LOCKED_VAL, &lock->val);
}
#endif
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index db284bff29dc..9dbb739cafa0 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -5,7 +5,7 @@
* Copyright 2001 Red Hat, Inc.
* Based on code from mm/memory.c Copyright Linus Torvalds and others.
*
- * Copyright 2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright 2011 Red Hat, Inc., Peter Zijlstra
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 0b921ae06cd8..0a271ca1f7c7 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -309,6 +309,11 @@ struct drm_file {
unsigned universal_planes:1;
/* true if client understands atomic properties */
unsigned atomic:1;
+ /*
+ * This client is allowed to gain master privileges for @master.
+ * Protected by struct drm_device::master_mutex.
+ */
+ unsigned allowed_master:1;
struct pid *pid;
kuid_t uid;
@@ -910,6 +915,7 @@ extern int drm_open(struct inode *inode, struct file *filp);
extern ssize_t drm_read(struct file *filp, char __user *buffer,
size_t count, loff_t *offset);
extern int drm_release(struct inode *inode, struct file *filp);
+extern int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv);
/* Mapping support (drm_vm.h) */
extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait);
@@ -947,6 +953,10 @@ extern void drm_send_vblank_event(struct drm_device *dev, unsigned int pipe,
struct drm_pending_vblank_event *e);
extern void drm_crtc_send_vblank_event(struct drm_crtc *crtc,
struct drm_pending_vblank_event *e);
+extern void drm_arm_vblank_event(struct drm_device *dev, unsigned int pipe,
+ struct drm_pending_vblank_event *e);
+extern void drm_crtc_arm_vblank_event(struct drm_crtc *crtc,
+ struct drm_pending_vblank_event *e);
extern bool drm_handle_vblank(struct drm_device *dev, unsigned int pipe);
extern bool drm_crtc_handle_vblank(struct drm_crtc *crtc);
extern int drm_vblank_get(struct drm_device *dev, unsigned int pipe);
diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index e67aeac2aee0..4b74c97d297a 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -136,6 +136,9 @@ drm_atomic_connectors_for_crtc(struct drm_atomic_state *state,
void drm_atomic_legacy_backoff(struct drm_atomic_state *state);
+void
+drm_atomic_clean_old_fb(struct drm_device *dev, unsigned plane_mask, int ret);
+
int __must_check drm_atomic_check_only(struct drm_atomic_state *state);
int __must_check drm_atomic_commit(struct drm_atomic_state *state);
int __must_check drm_atomic_async_commit(struct drm_atomic_state *state);
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 9c747cb14ad8..d2f41477f8ae 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -342,10 +342,10 @@ int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
struct irq_phys_map *map, bool level);
void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
-int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
int virt_irq, int irq);
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
+bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 054833939995..1991aea2ec4c 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -870,8 +870,8 @@ static inline int acpi_dev_get_property(struct acpi_device *adev,
}
static inline int acpi_node_get_property_reference(struct fwnode_handle *fwnode,
- const char *name, const char *cells_name,
- size_t index, struct acpi_reference_args *args)
+ const char *name, size_t index,
+ struct acpi_reference_args *args)
{
return -ENXIO;
}
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 2b8ed123ad36..defeaac0745f 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -107,7 +107,7 @@ static inline __u64 ror64(__u64 word, unsigned int shift)
*/
static inline __u32 rol32(__u32 word, unsigned int shift)
{
- return (word << shift) | (word >> (32 - shift));
+ return (word << shift) | (word >> ((-shift) & 31));
}
/**
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3fe27f8d91f0..c70e3588a48c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -254,6 +254,7 @@ struct queue_limits {
unsigned long virt_boundary_mask;
unsigned int max_hw_sectors;
+ unsigned int max_dev_sectors;
unsigned int chunk_sectors;
unsigned int max_sectors;
unsigned int max_segment_size;
@@ -773,7 +774,6 @@ extern void blk_rq_set_block_pc(struct request *);
extern void blk_requeue_request(struct request_queue *, struct request *);
extern void blk_add_request_payload(struct request *rq, struct page *page,
unsigned int len);
-extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
extern int blk_lld_busy(struct request_queue *q);
extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
struct bio_set *bs, gfp_t gfp_mask,
@@ -794,7 +794,10 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
struct scsi_ioctl_command __user *);
+extern int blk_queue_enter(struct request_queue *q, gfp_t gfp);
+extern void blk_queue_exit(struct request_queue *q);
extern void blk_start_queue(struct request_queue *q);
+extern void blk_start_queue_async(struct request_queue *q);
extern void blk_stop_queue(struct request_queue *q);
extern void blk_sync_queue(struct request_queue *q);
extern void __blk_stop_queue(struct request_queue *q);
@@ -958,7 +961,6 @@ extern struct request_queue *blk_init_allocated_queue(struct request_queue *,
extern void blk_cleanup_queue(struct request_queue *);
extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
extern void blk_queue_bounce_limit(struct request_queue *, u64);
-extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int);
extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
extern void blk_queue_max_segments(struct request_queue *, unsigned short);
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index f589222bfa87..35b22f94d2d2 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -19,6 +19,10 @@ extern unsigned long min_low_pfn;
* highest page
*/
extern unsigned long max_pfn;
+/*
+ * highest possible page
+ */
+extern unsigned long long max_possible_pfn;
#ifndef CONFIG_NO_BOOTMEM
/*
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index de464e6683b6..83d1926c61e4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -40,6 +40,7 @@ struct bpf_map {
struct user_struct *user;
const struct bpf_map_ops *ops;
struct work_struct work;
+ atomic_t usercnt;
};
struct bpf_map_type_list {
@@ -167,8 +168,10 @@ struct bpf_prog *bpf_prog_get(u32 ufd);
void bpf_prog_put(struct bpf_prog *prog);
void bpf_prog_put_rcu(struct bpf_prog *prog);
-struct bpf_map *bpf_map_get(u32 ufd);
+struct bpf_map *bpf_map_get_with_uref(u32 ufd);
struct bpf_map *__bpf_map_get(struct fd f);
+void bpf_map_inc(struct bpf_map *map, bool uref);
+void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map);
extern int sysctl_unprivileged_bpf_disabled;
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 60d44b26276d..06b77f9dd3f2 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -90,7 +90,6 @@ enum {
*/
struct cgroup_file {
/* do not access any fields from outside cgroup core */
- struct list_head node; /* anchored at css->files */
struct kernfs_node *kn;
};
@@ -134,9 +133,6 @@ struct cgroup_subsys_state {
*/
u64 serial_nr;
- /* all cgroup_files associated with this css */
- struct list_head files;
-
/* percpu_ref killing and RCU release */
struct rcu_head rcu_head;
struct work_struct destroy_work;
@@ -426,12 +422,9 @@ struct cgroup_subsys {
void (*css_reset)(struct cgroup_subsys_state *css);
void (*css_e_css_changed)(struct cgroup_subsys_state *css);
- int (*can_attach)(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset);
- void (*cancel_attach)(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset);
- void (*attach)(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset);
+ int (*can_attach)(struct cgroup_taskset *tset);
+ void (*cancel_attach)(struct cgroup_taskset *tset);
+ void (*attach)(struct cgroup_taskset *tset);
int (*can_fork)(struct task_struct *task, void **priv_p);
void (*cancel_fork)(struct task_struct *task, void *priv);
void (*fork)(struct task_struct *task, void *priv);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 22e3754f89c5..cb91b44f5f78 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -88,6 +88,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_rm_cftypes(struct cftype *cfts);
+void cgroup_file_notify(struct cgroup_file *cfile);
char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
@@ -119,8 +120,10 @@ struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state
struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos,
struct cgroup_subsys_state *css);
-struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
-struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
+struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
+ struct cgroup_subsys_state **dst_cssp);
+struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
+ struct cgroup_subsys_state **dst_cssp);
void css_task_iter_start(struct cgroup_subsys_state *css,
struct css_task_iter *it);
@@ -235,30 +238,39 @@ void css_task_iter_end(struct css_task_iter *it);
/**
* cgroup_taskset_for_each - iterate cgroup_taskset
* @task: the loop cursor
+ * @dst_css: the destination css
* @tset: taskset to iterate
*
* @tset may contain multiple tasks and they may belong to multiple
- * processes. When there are multiple tasks in @tset, if a task of a
- * process is in @tset, all tasks of the process are in @tset. Also, all
- * are guaranteed to share the same source and destination csses.
+ * processes.
+ *
+ * On the v2 hierarchy, there may be tasks from multiple processes and they
+ * may not share the source or destination csses.
+ *
+ * On traditional hierarchies, when there are multiple tasks in @tset, if a
+ * task of a process is in @tset, all tasks of the process are in @tset.
+ * Also, all are guaranteed to share the same source and destination csses.
*
* Iteration is not in any specific order.
*/
-#define cgroup_taskset_for_each(task, tset) \
- for ((task) = cgroup_taskset_first((tset)); (task); \
- (task) = cgroup_taskset_next((tset)))
+#define cgroup_taskset_for_each(task, dst_css, tset) \
+ for ((task) = cgroup_taskset_first((tset), &(dst_css)); \
+ (task); \
+ (task) = cgroup_taskset_next((tset), &(dst_css)))
/**
* cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
* @leader: the loop cursor
+ * @dst_css: the destination css
* @tset: takset to iterate
*
* Iterate threadgroup leaders of @tset. For single-task migrations, @tset
* may not contain any.
*/
-#define cgroup_taskset_for_each_leader(leader, tset) \
- for ((leader) = cgroup_taskset_first((tset)); (leader); \
- (leader) = cgroup_taskset_next((tset))) \
+#define cgroup_taskset_for_each_leader(leader, dst_css, tset) \
+ for ((leader) = cgroup_taskset_first((tset), &(dst_css)); \
+ (leader); \
+ (leader) = cgroup_taskset_next((tset), &(dst_css))) \
if ((leader) != (leader)->group_leader) \
; \
else
@@ -516,19 +528,6 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
pr_cont_kernfs_path(cgrp->kn);
}
-/**
- * cgroup_file_notify - generate a file modified event for a cgroup_file
- * @cfile: target cgroup_file
- *
- * @cfile must have been obtained by setting cftype->file_offset.
- */
-static inline void cgroup_file_notify(struct cgroup_file *cfile)
-{
- /* might not have been created due to one of the CFTYPE selector flags */
- if (cfile->kn)
- kernfs_notify(cfile->kn);
-}
-
#else /* !CONFIG_CGROUPS */
struct cgroup_subsys_state;
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 7784b597e959..6013021a3b39 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -62,12 +62,18 @@ struct module;
* @suspend: suspend function for the clocksource, if necessary
* @resume: resume function for the clocksource, if necessary
* @owner: module reference, must be set by clocksource in modules
+ *
+ * Note: This struct is not used in hotpathes of the timekeeping code
+ * because the timekeeper caches the hot path fields in its own data
+ * structure, so no line cache alignment is required,
+ *
+ * The pointer to the clocksource itself is handed to the read
+ * callback. If you need extra information there you can wrap struct
+ * clocksource into your own struct. Depending on the amount of
+ * information you need you should consider to cache line align that
+ * structure.
*/
struct clocksource {
- /*
- * Hotpath data, fits in a single cache line when the
- * clocksource itself is cacheline aligned.
- */
cycle_t (*read)(struct clocksource *cs);
cycle_t mask;
u32 mult;
@@ -95,7 +101,7 @@ struct clocksource {
cycle_t wd_last;
#endif
struct module *owner;
-} ____cacheline_aligned;
+};
/*
* Clock source flags bits::
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 4dac1036594f..00b042c49ccd 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -299,6 +299,23 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
__u.__val; \
})
+/**
+ * smp_cond_acquire() - Spin wait for cond with ACQUIRE ordering
+ * @cond: boolean expression to wait for
+ *
+ * Equivalent to using smp_load_acquire() on the condition variable but employs
+ * the control dependency of the wait to reduce the barrier on many platforms.
+ *
+ * The control dependency provides a LOAD->STORE order, the additional RMB
+ * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
+ * aka. ACQUIRE.
+ */
+#define smp_cond_acquire(cond) do { \
+ while (!(cond)) \
+ cpu_relax(); \
+ smp_rmb(); /* ctrl + rmb := acquire */ \
+} while (0)
+
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index a8a335b7fce0..758a029011b1 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -197,6 +197,16 @@ static inline struct configfs_subsystem *to_configfs_subsystem(struct config_gro
int configfs_register_subsystem(struct configfs_subsystem *subsys);
void configfs_unregister_subsystem(struct configfs_subsystem *subsys);
+int configfs_register_group(struct config_group *parent_group,
+ struct config_group *group);
+void configfs_unregister_group(struct config_group *group);
+
+struct config_group *
+configfs_register_default_group(struct config_group *parent_group,
+ const char *name,
+ struct config_item_type *item_type);
+void configfs_unregister_default_group(struct config_group *group);
+
/* These functions can sleep and can alloc with GFP_KERNEL */
/* WARNING: These cannot be called underneath configfs callbacks!! */
int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target);
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 68b575afe5f5..d259274238db 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -86,7 +86,7 @@ static inline void context_tracking_init(void) { }
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
static inline void guest_enter(void)
{
- if (vtime_accounting_enabled())
+ if (vtime_accounting_cpu_enabled())
vtime_guest_enter(current);
else
current->flags |= PF_VCPU;
@@ -100,7 +100,7 @@ static inline void guest_exit(void)
if (context_tracking_is_enabled())
__context_tracking_exit(CONTEXT_GUEST);
- if (vtime_accounting_enabled())
+ if (vtime_accounting_cpu_enabled())
vtime_guest_exit(current);
else
current->flags &= ~PF_VCPU;
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index ee956c528fab..1d34fe68f48a 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -22,12 +22,12 @@ struct context_tracking {
};
#ifdef CONFIG_CONTEXT_TRACKING
-extern struct static_key context_tracking_enabled;
+extern struct static_key_false context_tracking_enabled;
DECLARE_PER_CPU(struct context_tracking, context_tracking);
static inline bool context_tracking_is_enabled(void)
{
- return static_key_false(&context_tracking_enabled);
+ return static_branch_unlikely(&context_tracking_enabled);
}
static inline bool context_tracking_cpu_is_enabled(void)
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index ef4c5b1a860f..177c7680c1a8 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -77,6 +77,7 @@ struct cpufreq_policy {
unsigned int suspend_freq; /* freq to set during suspend */
unsigned int policy; /* see above */
+ unsigned int last_policy; /* policy before unplug */
struct cpufreq_governor *governor; /* see below */
void *governor_data;
bool governor_enabled; /* governor start/stop flag */
diff --git a/include/linux/delayed_call.h b/include/linux/delayed_call.h
new file mode 100644
index 000000000000..f7fa76ae1a9b
--- /dev/null
+++ b/include/linux/delayed_call.h
@@ -0,0 +1,34 @@
+#ifndef _DELAYED_CALL_H
+#define _DELAYED_CALL_H
+
+/*
+ * Poor man's closures; I wish we could've done them sanely polymorphic,
+ * but...
+ */
+
+struct delayed_call {
+ void (*fn)(void *);
+ void *arg;
+};
+
+#define DEFINE_DELAYED_CALL(name) struct delayed_call name = {NULL, NULL}
+
+/* I really wish we had closures with sane typechecking... */
+static inline void set_delayed_call(struct delayed_call *call,
+ void (*fn)(void *), void *arg)
+{
+ call->fn = fn;
+ call->arg = arg;
+}
+
+static inline void do_delayed_call(struct delayed_call *call)
+{
+ if (call->fn)
+ call->fn(call->arg);
+}
+
+static inline void clear_delayed_call(struct delayed_call *call)
+{
+ call->fn = NULL;
+}
+#endif
diff --git a/include/linux/dns_resolver.h b/include/linux/dns_resolver.h
index cc92268af89a..6ac3cad9aef1 100644
--- a/include/linux/dns_resolver.h
+++ b/include/linux/dns_resolver.h
@@ -27,7 +27,7 @@
#ifdef __KERNEL__
extern int dns_query(const char *type, const char *name, size_t namelen,
- const char *options, char **_result, time_t *_expiry);
+ const char *options, char **_result, time64_t *_expiry);
#endif /* KERNEL */
diff --git a/include/linux/enclosure.h b/include/linux/enclosure.h
index 7be22da321f3..a4cf57cd0f75 100644
--- a/include/linux/enclosure.h
+++ b/include/linux/enclosure.h
@@ -29,7 +29,11 @@
/* A few generic types ... taken from ses-2 */
enum enclosure_component_type {
ENCLOSURE_COMPONENT_DEVICE = 0x01,
+ ENCLOSURE_COMPONENT_CONTROLLER_ELECTRONICS = 0x07,
+ ENCLOSURE_COMPONENT_SCSI_TARGET_PORT = 0x14,
+ ENCLOSURE_COMPONENT_SCSI_INITIATOR_PORT = 0x15,
ENCLOSURE_COMPONENT_ARRAY_DEVICE = 0x17,
+ ENCLOSURE_COMPONENT_SAS_EXPANDER = 0x18,
};
/* ses-2 common element status */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4165e9ac9e36..5972ffe5719a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -493,6 +493,25 @@ static inline void bpf_jit_free(struct bpf_prog *fp)
#define BPF_ANC BIT(15)
+static inline bool bpf_needs_clear_a(const struct sock_filter *first)
+{
+ switch (first->code) {
+ case BPF_RET | BPF_K:
+ case BPF_LD | BPF_W | BPF_LEN:
+ return false;
+
+ case BPF_LD | BPF_W | BPF_ABS:
+ case BPF_LD | BPF_H | BPF_ABS:
+ case BPF_LD | BPF_B | BPF_ABS:
+ if (first->k == SKF_AD_OFF + SKF_AD_ALU_XOR_X)
+ return true;
+ return false;
+
+ default:
+ return true;
+ }
+}
+
static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
{
BUG_ON(ftest->code & BPF_ANC);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3aa514254161..ef3cd36689f6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -31,6 +31,7 @@
#include <linux/blk_types.h>
#include <linux/workqueue.h>
#include <linux/percpu-rwsem.h>
+#include <linux/delayed_call.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -1633,12 +1634,11 @@ struct file_operations {
struct inode_operations {
struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
- const char * (*follow_link) (struct dentry *, void **);
+ const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
int (*permission) (struct inode *, int);
struct posix_acl * (*get_acl)(struct inode *, int);
int (*readlink) (struct dentry *, char __user *,int);
- void (*put_link) (struct inode *, void *);
int (*create) (struct inode *,struct dentry *, umode_t, bool);
int (*link) (struct dentry *,struct inode *,struct dentry *);
@@ -2736,14 +2736,14 @@ extern const struct file_operations generic_ro_fops;
extern int readlink_copy(char __user *, int, const char *);
extern int page_readlink(struct dentry *, char __user *, int);
-extern const char *page_follow_link_light(struct dentry *, void **);
-extern void page_put_link(struct inode *, void *);
+extern const char *page_get_link(struct dentry *, struct inode *,
+ struct delayed_call *);
+extern void page_put_link(void *);
extern int __page_symlink(struct inode *inode, const char *symname, int len,
int nofs);
extern int page_symlink(struct inode *inode, const char *symname, int len);
extern const struct inode_operations page_symlink_inode_operations;
-extern void kfree_put_link(struct inode *, void *);
-extern void free_page_put_link(struct inode *, void *);
+extern void kfree_link(void *);
extern int generic_readlink(struct dentry *, char __user *, int);
extern void generic_fillattr(struct inode *, struct kstat *);
int vfs_getattr_nosec(struct path *path, struct kstat *stat);
@@ -2754,7 +2754,8 @@ void __inode_sub_bytes(struct inode *inode, loff_t bytes);
void inode_sub_bytes(struct inode *inode, loff_t bytes);
loff_t inode_get_bytes(struct inode *inode);
void inode_set_bytes(struct inode *inode, loff_t bytes);
-const char *simple_follow_link(struct dentry *, void **);
+const char *simple_get_link(struct dentry *, struct inode *,
+ struct delayed_call *);
extern const struct inode_operations simple_symlink_inode_operations;
extern int iterate_dir(struct file *, struct dir_context *);
@@ -2764,8 +2765,6 @@ extern int vfs_lstat(const char __user *, struct kstat *);
extern int vfs_fstat(unsigned int, struct kstat *);
extern int vfs_fstatat(int , const char __user *, struct kstat *, int);
-extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
- unsigned long arg);
extern int __generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo,
loff_t start, loff_t len,
@@ -3025,5 +3024,6 @@ static inline bool dir_relax(struct inode *inode)
}
extern bool path_noexec(const struct path *path);
+extern void inode_nohighmem(struct inode *inode);
#endif /* _LINUX_FS_H */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index eae6548efbf0..60048c50404e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -586,6 +586,7 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size);
extern int skip_trace(unsigned long ip);
extern void ftrace_module_init(struct module *mod);
+extern void ftrace_release_mod(struct module *mod);
extern void ftrace_disable_daemon(void);
extern void ftrace_enable_daemon(void);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 6523109e136d..8942af0813e3 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -271,7 +271,7 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
{
- return gfp_flags & __GFP_DIRECT_RECLAIM;
+ return (bool __force)(gfp_flags & __GFP_DIRECT_RECLAIM);
}
#ifdef CONFIG_HIGHMEM
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 1c1ff7e4faa4..f2cb8d45513d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -150,7 +150,7 @@ extern struct task_group root_task_group;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
# define INIT_VTIME(tsk) \
- .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \
+ .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \
.vtime_snap = 0, \
.vtime_snap_whence = VTIME_SYS,
#else
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index ad16809c8596..cb30edbfe9fc 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -195,6 +195,7 @@ extern void disable_irq(unsigned int irq);
extern void disable_percpu_irq(unsigned int irq);
extern void enable_irq(unsigned int irq);
extern void enable_percpu_irq(unsigned int irq, unsigned int type);
+extern bool irq_percpu_is_enabled(unsigned int irq);
extern void irq_wake_thread(unsigned int irq, void *dev_id);
/* The following three functions are for the core kernel use only. */
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 0ef2a97ccdb5..402753bccafa 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -227,7 +227,7 @@ struct ipv6_pinfo {
struct ipv6_ac_socklist *ipv6_ac_list;
struct ipv6_fl_socklist __rcu *ipv6_fl_list;
- struct ipv6_txoptions *opt;
+ struct ipv6_txoptions __rcu *opt;
struct sk_buff *pktoptions;
struct sk_buff *rxpmtu;
struct inet6_cork cork;
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index c9ae0c6ec050..d5d798b35c1f 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -330,6 +330,7 @@ struct rdists {
};
struct irq_domain;
+struct device_node;
int its_cpu_init(void);
int its_init(struct device_node *node, struct rdists *rdists,
struct irq_domain *domain);
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index bae69e5d693c..9c940263ca23 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -103,10 +103,21 @@ struct device_node;
void gic_cascade_irq(unsigned int gic_nr, unsigned int irq);
int gic_cpu_if_down(unsigned int gic_nr);
+/*
+ * Subdrivers that need some preparatory work can initialize their
+ * chips and call this to register their GICs.
+ */
+int gic_of_init(struct device_node *node, struct device_node *parent);
+
+/*
+ * Legacy platforms not converted to DT yet must use this to init
+ * their GIC
+ */
void gic_init(unsigned int nr, int start,
void __iomem *dist , void __iomem *cpu);
-int gicv2m_of_init(struct device_node *node, struct irq_domain *parent);
+int gicv2m_init(struct fwnode_handle *parent_handle,
+ struct irq_domain *parent);
void gic_send_sgi(unsigned int cpu_id, unsigned int irq);
int gic_get_cpu_id(unsigned int cpu);
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index a587a33363c7..dcca77c4b9d2 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -1,6 +1,8 @@
#ifndef _LINUX_IRQDESC_H
#define _LINUX_IRQDESC_H
+#include <linux/rcupdate.h>
+
/*
* Core internal functions to deal with irq descriptors
*/
@@ -40,6 +42,7 @@ struct pt_regs;
* IRQF_NO_SUSPEND set
* @force_resume_depth: number of irqactions on a irq descriptor with
* IRQF_FORCE_RESUME set
+ * @rcu: rcu head for delayed free
* @dir: /proc/irq/ procfs entry
* @name: flow handler name for /proc/interrupts output
*/
@@ -82,6 +85,9 @@ struct irq_desc {
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *dir;
#endif
+#ifdef CONFIG_SPARSE_IRQ
+ struct rcu_head rcu;
+#endif
int parent_irq;
struct module *owner;
const char *name;
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index d5e5c5bef28c..f64622ad02c1 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -211,6 +211,11 @@ static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
return node ? &node->fwnode : NULL;
}
+static inline bool is_fwnode_irqchip(struct fwnode_handle *fwnode)
+{
+ return fwnode && fwnode->type == FWNODE_IRQCHIP;
+}
+
static inline struct irq_domain *irq_find_matching_host(struct device_node *node,
enum irq_domain_bus_token bus_token)
{
@@ -367,6 +372,9 @@ static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false);
}
+extern int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
+ unsigned int irq_base,
+ unsigned int nr_irqs, void *arg);
extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain,
unsigned int virq,
irq_hw_number_t hwirq,
@@ -410,6 +418,11 @@ static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
static inline void irq_dispose_mapping(unsigned int virq) { }
static inline void irq_domain_activate_irq(struct irq_data *data) { }
static inline void irq_domain_deactivate_irq(struct irq_data *data) { }
+static inline struct irq_domain *irq_find_matching_fwnode(
+ struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token)
+{
+ return NULL;
+}
#endif /* !CONFIG_IRQ_DOMAIN */
#endif /* _LINUX_IRQDOMAIN_H */
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 8dde55974f18..0536524bb9eb 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -5,7 +5,7 @@
* Jump label support
*
* Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com>
- * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
*
* DEPRECATED API:
*
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 350dfb08aee3..7311c3294e25 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -255,6 +255,7 @@ extern long (*panic_blink)(int state);
__printf(1, 2)
void panic(const char *fmt, ...)
__noreturn __cold;
+void nmi_panic_self_stop(struct pt_regs *);
extern void oops_enter(void);
extern void oops_exit(void);
void print_oops_end_marker(void);
@@ -446,6 +447,33 @@ extern int sysctl_panic_on_stackoverflow;
extern bool crash_kexec_post_notifiers;
/*
+ * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It
+ * holds a CPU number which is executing panic() currently. A value of
+ * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec().
+ */
+extern atomic_t panic_cpu;
+#define PANIC_CPU_INVALID -1
+
+/*
+ * A variant of panic() called from NMI context. We return if we've already
+ * panicked on this CPU. If another CPU already panicked, loop in
+ * nmi_panic_self_stop() which can provide architecture dependent code such
+ * as saving register state for crash dump.
+ */
+#define nmi_panic(regs, fmt, ...) \
+do { \
+ int old_cpu, cpu; \
+ \
+ cpu = raw_smp_processor_id(); \
+ old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu); \
+ \
+ if (old_cpu == PANIC_CPU_INVALID) \
+ panic(fmt, ##__VA_ARGS__); \
+ else if (old_cpu != cpu) \
+ nmi_panic_self_stop(regs); \
+} while (0)
+
+/*
* Only to be used by arch init code. If the user over-wrote the default
* CONFIG_PANIC_TIMEOUT, honor it.
*/
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index d140b1e9faa7..7b68d2788a56 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -237,6 +237,7 @@ extern int kexec_purgatory_get_set_symbol(struct kimage *image,
unsigned int size, bool get_value);
extern void *kexec_purgatory_get_symbol_addr(struct kimage *image,
const char *name);
+extern void __crash_kexec(struct pt_regs *);
extern void crash_kexec(struct pt_regs *);
int kexec_should_crash(struct task_struct *);
void crash_save_cpu(struct pt_regs *regs, int cpu);
@@ -332,6 +333,7 @@ int __weak arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
#else /* !CONFIG_KEXEC_CORE */
struct pt_regs;
struct task_struct;
+static inline void __crash_kexec(struct pt_regs *regs) { }
static inline void crash_kexec(struct pt_regs *regs) { }
static inline int kexec_should_crash(struct task_struct *p) { return 0; }
#define kexec_in_progress false
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index d0a1f99e24e3..4894c6888bc6 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -25,7 +25,7 @@
#ifdef CONFIG_DEBUG_KMEMLEAK
-extern void kmemleak_init(void) __ref;
+extern void kmemleak_init(void) __init;
extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
gfp_t gfp) __ref;
extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
diff --git a/include/linux/kref.h b/include/linux/kref.h
index 484604d184be..e15828fd71f1 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -19,7 +19,6 @@
#include <linux/atomic.h>
#include <linux/kernel.h>
#include <linux/mutex.h>
-#include <linux/spinlock.h>
struct kref {
atomic_t refcount;
@@ -99,38 +98,6 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)
return kref_sub(kref, 1, release);
}
-/**
- * kref_put_spinlock_irqsave - decrement refcount for object.
- * @kref: object.
- * @release: pointer to the function that will clean up the object when the
- * last reference to the object is released.
- * This pointer is required, and it is not acceptable to pass kfree
- * in as this function.
- * @lock: lock to take in release case
- *
- * Behaves identical to kref_put with one exception. If the reference count
- * drops to zero, the lock will be taken atomically wrt dropping the reference
- * count. The release function has to call spin_unlock() without _irqrestore.
- */
-static inline int kref_put_spinlock_irqsave(struct kref *kref,
- void (*release)(struct kref *kref),
- spinlock_t *lock)
-{
- unsigned long flags;
-
- WARN_ON(release == NULL);
- if (atomic_add_unless(&kref->refcount, -1, 1))
- return 0;
- spin_lock_irqsave(lock, flags);
- if (atomic_dec_and_test(&kref->refcount)) {
- release(kref);
- local_irq_restore(flags);
- return 1;
- }
- spin_unlock_irqrestore(lock, flags);
- return 0;
-}
-
static inline int kref_put_mutex(struct kref *kref,
void (*release)(struct kref *kref),
struct mutex *lock)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5706a2108f0a..c923350ca20a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -460,6 +460,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
(vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
idx++)
+static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ if (vcpu->vcpu_id == id)
+ return vcpu;
+ return NULL;
+}
+
#define kvm_for_each_memslot(memslot, slots) \
for (memslot = &slots->memslots[0]; \
memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 83577f8fd15b..600c1e0626a5 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -210,6 +210,7 @@ enum {
ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */
/* (doesn't imply presence) */
ATA_FLAG_SATA = (1 << 1),
+ ATA_FLAG_NO_LOG_PAGE = (1 << 5), /* do not issue log page read */
ATA_FLAG_NO_ATAPI = (1 << 6), /* No ATAPI support */
ATA_FLAG_PIO_DMA = (1 << 7), /* PIO cmds via DMA */
ATA_FLAG_PIO_LBA48 = (1 << 8), /* Host DMA engine is LBA28 only */
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 69c9057e1ab8..034117b3be5f 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -50,15 +50,21 @@ enum {
NVM_IO_DUAL_ACCESS = 0x1,
NVM_IO_QUAD_ACCESS = 0x2,
+ /* NAND Access Modes */
NVM_IO_SUSPEND = 0x80,
NVM_IO_SLC_MODE = 0x100,
NVM_IO_SCRAMBLE_DISABLE = 0x200,
+
+ /* Block Types */
+ NVM_BLK_T_FREE = 0x0,
+ NVM_BLK_T_BAD = 0x1,
+ NVM_BLK_T_DEV = 0x2,
+ NVM_BLK_T_HOST = 0x4,
};
struct nvm_id_group {
u8 mtype;
u8 fmtype;
- u16 res16;
u8 num_ch;
u8 num_lun;
u8 num_pln;
@@ -74,9 +80,9 @@ struct nvm_id_group {
u32 tbet;
u32 tbem;
u32 mpos;
+ u32 mccap;
u16 cpar;
- u8 res[913];
-} __packed;
+};
struct nvm_addr_format {
u8 ch_offset;
@@ -91,19 +97,15 @@ struct nvm_addr_format {
u8 pg_len;
u8 sect_offset;
u8 sect_len;
- u8 res[4];
};
struct nvm_id {
u8 ver_id;
u8 vmnt;
u8 cgrps;
- u8 res[5];
u32 cap;
u32 dom;
struct nvm_addr_format ppaf;
- u8 ppat;
- u8 resv[224];
struct nvm_id_group groups[4];
} __packed;
@@ -123,39 +125,28 @@ struct nvm_tgt_instance {
#define NVM_VERSION_MINOR 0
#define NVM_VERSION_PATCH 0
-#define NVM_SEC_BITS (8)
-#define NVM_PL_BITS (6)
-#define NVM_PG_BITS (16)
#define NVM_BLK_BITS (16)
-#define NVM_LUN_BITS (10)
+#define NVM_PG_BITS (16)
+#define NVM_SEC_BITS (8)
+#define NVM_PL_BITS (8)
+#define NVM_LUN_BITS (8)
#define NVM_CH_BITS (8)
struct ppa_addr {
+ /* Generic structure for all addresses */
union {
- /* Channel-based PPA format in nand 4x2x2x2x8x10 */
- struct {
- u64 ch : 4;
- u64 sec : 2; /* 4 sectors per page */
- u64 pl : 2; /* 4 planes per LUN */
- u64 lun : 2; /* 4 LUNs per channel */
- u64 pg : 8; /* 256 pages per block */
- u64 blk : 10;/* 1024 blocks per plane */
- u64 resved : 36;
- } chnl;
-
- /* Generic structure for all addresses */
struct {
+ u64 blk : NVM_BLK_BITS;
+ u64 pg : NVM_PG_BITS;
u64 sec : NVM_SEC_BITS;
u64 pl : NVM_PL_BITS;
- u64 pg : NVM_PG_BITS;
- u64 blk : NVM_BLK_BITS;
u64 lun : NVM_LUN_BITS;
u64 ch : NVM_CH_BITS;
} g;
u64 ppa;
};
-} __packed;
+};
struct nvm_rq {
struct nvm_tgt_instance *ins;
@@ -191,18 +182,18 @@ static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata)
struct nvm_block;
typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
-typedef int (nvm_bb_update_fn)(u32, void *, unsigned int, void *);
-typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *);
-typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32,
+typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *);
+typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *);
+typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32,
nvm_l2p_update_fn *, void *);
-typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, int, unsigned int,
+typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int,
nvm_bb_update_fn *, void *);
-typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int);
-typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *);
-typedef int (nvm_erase_blk_fn)(struct request_queue *, struct nvm_rq *);
-typedef void *(nvm_create_dma_pool_fn)(struct request_queue *, char *);
+typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int);
+typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
+typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *);
+typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
typedef void (nvm_destroy_dma_pool_fn)(void *);
-typedef void *(nvm_dev_dma_alloc_fn)(struct request_queue *, void *, gfp_t,
+typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
dma_addr_t *);
typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t);
@@ -210,7 +201,7 @@ struct nvm_dev_ops {
nvm_id_fn *identity;
nvm_get_l2p_tbl_fn *get_l2p_tbl;
nvm_op_bb_tbl_fn *get_bb_tbl;
- nvm_op_set_bb_fn *set_bb;
+ nvm_op_set_bb_fn *set_bb_tbl;
nvm_submit_io_fn *submit_io;
nvm_erase_blk_fn *erase_block;
@@ -220,7 +211,7 @@ struct nvm_dev_ops {
nvm_dev_dma_alloc_fn *dev_dma_alloc;
nvm_dev_dma_free_fn *dev_dma_free;
- uint8_t max_phys_sect;
+ unsigned int max_phys_sect;
};
struct nvm_lun {
@@ -229,7 +220,9 @@ struct nvm_lun {
int lun_id;
int chnl_id;
+ unsigned int nr_inuse_blocks; /* Number of used blocks */
unsigned int nr_free_blocks; /* Number of unused blocks */
+ unsigned int nr_bad_blocks; /* Number of bad blocks */
struct nvm_block *blocks;
spinlock_t lock;
@@ -263,8 +256,7 @@ struct nvm_dev {
int blks_per_lun;
int sec_size;
int oob_size;
- int addr_mode;
- struct nvm_addr_format addr_format;
+ struct nvm_addr_format ppaf;
/* Calculated/Cached values. These do not reflect the actual usable
* blocks at run-time.
@@ -290,118 +282,45 @@ struct nvm_dev {
char name[DISK_NAME_LEN];
};
-/* fallback conversion */
-static struct ppa_addr __generic_to_linear_addr(struct nvm_dev *dev,
- struct ppa_addr r)
+static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
+ struct ppa_addr r)
{
struct ppa_addr l;
- l.ppa = r.g.sec +
- r.g.pg * dev->sec_per_pg +
- r.g.blk * (dev->pgs_per_blk *
- dev->sec_per_pg) +
- r.g.lun * (dev->blks_per_lun *
- dev->pgs_per_blk *
- dev->sec_per_pg) +
- r.g.ch * (dev->blks_per_lun *
- dev->pgs_per_blk *
- dev->luns_per_chnl *
- dev->sec_per_pg);
+ l.ppa = ((u64)r.g.blk) << dev->ppaf.blk_offset;
+ l.ppa |= ((u64)r.g.pg) << dev->ppaf.pg_offset;
+ l.ppa |= ((u64)r.g.sec) << dev->ppaf.sect_offset;
+ l.ppa |= ((u64)r.g.pl) << dev->ppaf.pln_offset;
+ l.ppa |= ((u64)r.g.lun) << dev->ppaf.lun_offset;
+ l.ppa |= ((u64)r.g.ch) << dev->ppaf.ch_offset;
return l;
}
-/* fallback conversion */
-static struct ppa_addr __linear_to_generic_addr(struct nvm_dev *dev,
- struct ppa_addr r)
+static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev,
+ struct ppa_addr r)
{
struct ppa_addr l;
- int secs, pgs, blks, luns;
- sector_t ppa = r.ppa;
-
- l.ppa = 0;
-
- div_u64_rem(ppa, dev->sec_per_pg, &secs);
- l.g.sec = secs;
- sector_div(ppa, dev->sec_per_pg);
- div_u64_rem(ppa, dev->sec_per_blk, &pgs);
- l.g.pg = pgs;
-
- sector_div(ppa, dev->pgs_per_blk);
- div_u64_rem(ppa, dev->blks_per_lun, &blks);
- l.g.blk = blks;
-
- sector_div(ppa, dev->blks_per_lun);
- div_u64_rem(ppa, dev->luns_per_chnl, &luns);
- l.g.lun = luns;
-
- sector_div(ppa, dev->luns_per_chnl);
- l.g.ch = ppa;
-
- return l;
-}
-
-static struct ppa_addr __generic_to_chnl_addr(struct ppa_addr r)
-{
- struct ppa_addr l;
-
- l.ppa = 0;
-
- l.chnl.sec = r.g.sec;
- l.chnl.pl = r.g.pl;
- l.chnl.pg = r.g.pg;
- l.chnl.blk = r.g.blk;
- l.chnl.lun = r.g.lun;
- l.chnl.ch = r.g.ch;
-
- return l;
-}
-
-static struct ppa_addr __chnl_to_generic_addr(struct ppa_addr r)
-{
- struct ppa_addr l;
-
- l.ppa = 0;
-
- l.g.sec = r.chnl.sec;
- l.g.pl = r.chnl.pl;
- l.g.pg = r.chnl.pg;
- l.g.blk = r.chnl.blk;
- l.g.lun = r.chnl.lun;
- l.g.ch = r.chnl.ch;
+ /*
+ * (r.ppa << X offset) & X len bitmask. X eq. blk, pg, etc.
+ */
+ l.g.blk = (r.ppa >> dev->ppaf.blk_offset) &
+ (((1 << dev->ppaf.blk_len) - 1));
+ l.g.pg |= (r.ppa >> dev->ppaf.pg_offset) &
+ (((1 << dev->ppaf.pg_len) - 1));
+ l.g.sec |= (r.ppa >> dev->ppaf.sect_offset) &
+ (((1 << dev->ppaf.sect_len) - 1));
+ l.g.pl |= (r.ppa >> dev->ppaf.pln_offset) &
+ (((1 << dev->ppaf.pln_len) - 1));
+ l.g.lun |= (r.ppa >> dev->ppaf.lun_offset) &
+ (((1 << dev->ppaf.lun_len) - 1));
+ l.g.ch |= (r.ppa >> dev->ppaf.ch_offset) &
+ (((1 << dev->ppaf.ch_len) - 1));
return l;
}
-static inline struct ppa_addr addr_to_generic_mode(struct nvm_dev *dev,
- struct ppa_addr gppa)
-{
- switch (dev->addr_mode) {
- case NVM_ADDRMODE_LINEAR:
- return __linear_to_generic_addr(dev, gppa);
- case NVM_ADDRMODE_CHANNEL:
- return __chnl_to_generic_addr(gppa);
- default:
- BUG();
- }
- return gppa;
-}
-
-static inline struct ppa_addr generic_to_addr_mode(struct nvm_dev *dev,
- struct ppa_addr gppa)
-{
- switch (dev->addr_mode) {
- case NVM_ADDRMODE_LINEAR:
- return __generic_to_linear_addr(dev, gppa);
- case NVM_ADDRMODE_CHANNEL:
- return __generic_to_chnl_addr(gppa);
- default:
- BUG();
- }
- return gppa;
-}
-
static inline int ppa_empty(struct ppa_addr ppa_addr)
{
return (ppa_addr.ppa == ADDR_EMPTY);
@@ -468,7 +387,7 @@ typedef int (nvmm_end_io_fn)(struct nvm_rq *, int);
typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
unsigned long);
typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
-typedef void (nvmm_free_blocks_print_fn)(struct nvm_dev *);
+typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
struct nvmm_type {
const char *name;
@@ -492,7 +411,7 @@ struct nvmm_type {
nvmm_get_lun_fn *get_lun;
/* Statistics */
- nvmm_free_blocks_print_fn *free_blocks_print;
+ nvmm_lun_info_print_fn *lun_info_print;
struct list_head list;
};
diff --git a/include/linux/list.h b/include/linux/list.h
index 993395a2e55c..5356f4d661a7 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -24,7 +24,7 @@
static inline void INIT_LIST_HEAD(struct list_head *list)
{
- list->next = list;
+ WRITE_ONCE(list->next, list);
list->prev = list;
}
@@ -42,7 +42,7 @@ static inline void __list_add(struct list_head *new,
next->prev = new;
new->next = next;
new->prev = prev;
- prev->next = new;
+ WRITE_ONCE(prev->next, new);
}
#else
extern void __list_add(struct list_head *new,
@@ -186,7 +186,7 @@ static inline int list_is_last(const struct list_head *list,
*/
static inline int list_empty(const struct list_head *head)
{
- return head->next == head;
+ return READ_ONCE(head->next) == head;
}
/**
@@ -608,7 +608,7 @@ static inline int hlist_unhashed(const struct hlist_node *h)
static inline int hlist_empty(const struct hlist_head *h)
{
- return !h->first;
+ return !READ_ONCE(h->first);
}
static inline void __hlist_del(struct hlist_node *n)
@@ -642,7 +642,7 @@ static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
n->next = first;
if (first)
first->pprev = &n->next;
- h->first = n;
+ WRITE_ONCE(h->first, n);
n->pprev = &h->first;
}
@@ -653,14 +653,14 @@ static inline void hlist_add_before(struct hlist_node *n,
n->pprev = next->pprev;
n->next = next;
next->pprev = &n->next;
- *(n->pprev) = n;
+ WRITE_ONCE(*(n->pprev), n);
}
static inline void hlist_add_behind(struct hlist_node *n,
struct hlist_node *prev)
{
n->next = prev->next;
- prev->next = n;
+ WRITE_ONCE(prev->next, n);
n->pprev = &prev->next;
if (n->next)
diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
index 8132214e8efd..ee7229a6c06a 100644
--- a/include/linux/list_bl.h
+++ b/include/linux/list_bl.h
@@ -70,7 +70,7 @@ static inline void hlist_bl_set_first(struct hlist_bl_head *h,
static inline int hlist_bl_empty(const struct hlist_bl_head *h)
{
- return !((unsigned long)h->first & ~LIST_BL_LOCKMASK);
+ return !((unsigned long)READ_ONCE(h->first) & ~LIST_BL_LOCKMASK);
}
static inline void hlist_bl_add_head(struct hlist_bl_node *n,
diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
index 444d2b1313bd..b01fe1009084 100644
--- a/include/linux/list_nulls.h
+++ b/include/linux/list_nulls.h
@@ -57,7 +57,7 @@ static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h)
static inline int hlist_nulls_empty(const struct hlist_nulls_head *h)
{
- return is_a_nulls(h->first);
+ return is_a_nulls(READ_ONCE(h->first));
}
static inline void hlist_nulls_add_head(struct hlist_nulls_node *n,
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 70400dc7660f..c57e424d914b 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -2,7 +2,7 @@
* Runtime locking correctness validator
*
* Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
*
* see Documentation/locking/lockdep-design.txt for more details.
*/
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index e6982ac3200d..a57f0dfb6db7 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -16,6 +16,7 @@
#define MARVELL_PHY_ID_88E1318S 0x01410e90
#define MARVELL_PHY_ID_88E1116R 0x01410e40
#define MARVELL_PHY_ID_88E1510 0x01410dd0
+#define MARVELL_PHY_ID_88E1540 0x01410eb0
#define MARVELL_PHY_ID_88E3016 0x01410e60
/* struct phy_device dev_flags definitions */
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 7501626ab529..d3133be12d92 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -427,6 +427,17 @@ enum {
};
enum {
+ /*
+ * Max wqe size for rdma read is 512 bytes, so this
+ * limits our max_sge_rd as the wqe needs to fit:
+ * - ctrl segment (16 bytes)
+ * - rdma segment (16 bytes)
+ * - scatter elements (16 bytes each)
+ */
+ MLX4_MAX_SGE_RD = (512 - 16 - 16) / 16
+};
+
+enum {
MLX4_DEV_PMC_SUBTYPE_GUID_INFO = 0x14,
MLX4_DEV_PMC_SUBTYPE_PORT_INFO = 0x15,
MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE = 0x16,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index dd2097455a2e..1565324eb620 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -453,26 +453,28 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
u8 lro_cap[0x1];
u8 lro_psh_flag[0x1];
u8 lro_time_stamp[0x1];
- u8 reserved_0[0x6];
+ u8 reserved_0[0x3];
+ u8 self_lb_en_modifiable[0x1];
+ u8 reserved_1[0x2];
u8 max_lso_cap[0x5];
- u8 reserved_1[0x4];
+ u8 reserved_2[0x4];
u8 rss_ind_tbl_cap[0x4];
- u8 reserved_2[0x3];
+ u8 reserved_3[0x3];
u8 tunnel_lso_const_out_ip_id[0x1];
- u8 reserved_3[0x2];
+ u8 reserved_4[0x2];
u8 tunnel_statless_gre[0x1];
u8 tunnel_stateless_vxlan[0x1];
- u8 reserved_4[0x20];
+ u8 reserved_5[0x20];
- u8 reserved_5[0x10];
+ u8 reserved_6[0x10];
u8 lro_min_mss_size[0x10];
- u8 reserved_6[0x120];
+ u8 reserved_7[0x120];
u8 lro_timer_supported_periods[4][0x20];
- u8 reserved_7[0x600];
+ u8 reserved_8[0x600];
};
struct mlx5_ifc_roce_cap_bits {
@@ -4051,9 +4053,11 @@ struct mlx5_ifc_modify_tis_in_bits {
};
struct mlx5_ifc_modify_tir_bitmask_bits {
- u8 reserved[0x20];
+ u8 reserved_0[0x20];
- u8 reserved1[0x1f];
+ u8 reserved_1[0x1b];
+ u8 self_lb_en[0x1];
+ u8 reserved_2[0x3];
u8 lro[0x1];
};
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 877ef226f90f..772362adf471 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -1,6 +1,7 @@
#ifndef LINUX_MM_DEBUG_H
#define LINUX_MM_DEBUG_H 1
+#include <linux/bug.h>
#include <linux/stringify.h>
struct page;
diff --git a/include/linux/msi.h b/include/linux/msi.h
index f71a25e5fd25..1c6342ab8c0e 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -174,6 +174,7 @@ struct msi_controller {
#include <asm/msi.h>
struct irq_domain;
+struct irq_domain_ops;
struct irq_chip;
struct device_node;
struct fwnode_handle;
@@ -279,6 +280,23 @@ struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode,
int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
irq_write_msi_msg_t write_msi_msg);
void platform_msi_domain_free_irqs(struct device *dev);
+
+/* When an MSI domain is used as an intermediate domain */
+int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *args);
+int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
+ int virq, int nvec, msi_alloc_info_t *args);
+struct irq_domain *
+platform_msi_create_device_domain(struct device *dev,
+ unsigned int nvec,
+ irq_write_msi_msg_t write_msi_msg,
+ const struct irq_domain_ops *ops,
+ void *host_data);
+int platform_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs);
+void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nvec);
+void *platform_msi_get_host_data(struct irq_domain *domain);
#endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index c8723b62c4cd..bc742dac7d3a 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -25,7 +25,7 @@
#define SNOR_MFR_MACRONIX CFI_MFR_MACRONIX
#define SNOR_MFR_SPANSION CFI_MFR_AMD
#define SNOR_MFR_SST CFI_MFR_SST
-#define SNOR_MFR_WINBOND 0xef
+#define SNOR_MFR_WINBOND 0xef /* Also used by some Spansion */
/*
* Note on opcode nomenclature: some opcodes have a format like
diff --git a/include/linux/net.h b/include/linux/net.h
index 70ac5e28e6b7..0b4ac7da583a 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -34,8 +34,12 @@ struct inode;
struct file;
struct net;
-#define SOCK_ASYNC_NOSPACE 0
-#define SOCK_ASYNC_WAITDATA 1
+/* Historically, SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA were located
+ * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected.
+ * Eventually all flags will be in sk->sk_wq_flags.
+ */
+#define SOCKWQ_ASYNC_NOSPACE 0
+#define SOCKWQ_ASYNC_WAITDATA 1
#define SOCK_NOSPACE 2
#define SOCK_PASSCRED 3
#define SOCK_PASSSEC 4
@@ -89,6 +93,7 @@ struct socket_wq {
/* Note: wait MUST be first field of socket_wq */
wait_queue_head_t wait;
struct fasync_struct *fasync_list;
+ unsigned long flags; /* %SOCKWQ_ASYNC_NOSPACE, etc */
struct rcu_head rcu;
} ____cacheline_aligned_in_smp;
@@ -96,7 +101,7 @@ struct socket_wq {
* struct socket - general BSD socket
* @state: socket state (%SS_CONNECTED, etc)
* @type: socket type (%SOCK_STREAM, etc)
- * @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc)
+ * @flags: socket flags (%SOCK_NOSPACE, etc)
* @ops: protocol specific socket operations
* @file: File back pointer for gc
* @sk: internal networking protocol agnostic socket representation
@@ -202,7 +207,7 @@ enum {
SOCK_WAKE_URG,
};
-int sock_wake_async(struct socket *sk, int how, int band);
+int sock_wake_async(struct socket_wq *sk_wq, int how, int band);
int sock_register(const struct net_proto_family *fam);
void sock_unregister(int family);
int __sock_create(struct net *net, int family, int type, int proto,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d20891465247..3143c847bddb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1398,7 +1398,8 @@ enum netdev_priv_flags {
* @dma: DMA channel
* @mtu: Interface MTU value
* @type: Interface hardware type
- * @hard_header_len: Hardware header length
+ * @hard_header_len: Hardware header length, which means that this is the
+ * minimum size of a packet.
*
* @needed_headroom: Extra headroom the hardware may need, but not in all
* cases can this be guaranteed
@@ -2068,20 +2069,23 @@ struct pcpu_sw_netstats {
struct u64_stats_sync syncp;
};
-#define netdev_alloc_pcpu_stats(type) \
-({ \
- typeof(type) __percpu *pcpu_stats = alloc_percpu(type); \
- if (pcpu_stats) { \
- int __cpu; \
- for_each_possible_cpu(__cpu) { \
- typeof(type) *stat; \
- stat = per_cpu_ptr(pcpu_stats, __cpu); \
- u64_stats_init(&stat->syncp); \
- } \
- } \
- pcpu_stats; \
+#define __netdev_alloc_pcpu_stats(type, gfp) \
+({ \
+ typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\
+ if (pcpu_stats) { \
+ int __cpu; \
+ for_each_possible_cpu(__cpu) { \
+ typeof(type) *stat; \
+ stat = per_cpu_ptr(pcpu_stats, __cpu); \
+ u64_stats_init(&stat->syncp); \
+ } \
+ } \
+ pcpu_stats; \
})
+#define netdev_alloc_pcpu_stats(type) \
+ __netdev_alloc_pcpu_stats(type, GFP_KERNEL)
+
#include <linux/notifier.h>
/* netdevice notifier chain. Please remember to update the rtnetlink
@@ -3854,6 +3858,11 @@ static inline bool netif_is_bridge_master(const struct net_device *dev)
return dev->priv_flags & IFF_EBRIDGE;
}
+static inline bool netif_is_bridge_port(const struct net_device *dev)
+{
+ return dev->priv_flags & IFF_BRIDGE_PORT;
+}
+
static inline bool netif_is_ovs_master(const struct net_device *dev)
{
return dev->priv_flags & IFF_OPENVSWITCH;
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 48bb01edcf30..0e1f433cc4b7 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -421,7 +421,7 @@ extern void ip_set_free(void *members);
extern int ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr);
extern int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr);
extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[],
- size_t len);
+ size_t len, size_t align);
extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext *ext);
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 249d1bb01e03..5646b24bfc64 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -14,7 +14,7 @@ struct nfnl_callback {
int (*call_rcu)(struct sock *nl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[]);
- int (*call_batch)(struct sock *nl, struct sk_buff *skb,
+ int (*call_batch)(struct net *net, struct sock *nl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[]);
const struct nla_policy *policy; /* netlink attribute policy */
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index 187feabe557c..5fcd375ef175 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -5,10 +5,13 @@
#include <linux/netdevice.h>
#ifdef CONFIG_NETFILTER_INGRESS
-static inline int nf_hook_ingress_active(struct sk_buff *skb)
+static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
{
- return nf_hook_list_active(&skb->dev->nf_hooks_ingress,
- NFPROTO_NETDEV, NF_NETDEV_INGRESS);
+#ifdef HAVE_JUMP_LABEL
+ if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
+ return false;
+#endif
+ return !list_empty(&skb->dev->nf_hooks_ingress);
}
static inline int nf_hook_ingress(struct sk_buff *skb)
@@ -16,8 +19,8 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
struct nf_hook_state state;
nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
- NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL,
- skb->dev, NULL, dev_net(skb->dev), NULL);
+ NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV,
+ skb->dev, NULL, NULL, dev_net(skb->dev), NULL);
return nf_hook_slow(skb, &state);
}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c0e961474a52..37a3d2981352 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -359,6 +359,7 @@ extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode);
extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
+extern int nfs_revalidate_mapping_rcu(struct inode *inode);
extern int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping);
extern int nfs_setattr(struct dentry *, struct iattr *);
extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 570d630f98ae..11bbae44f4cb 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -251,6 +251,7 @@ struct nfs4_layoutget {
struct nfs4_layoutget_res res;
struct rpc_cred *cred;
gfp_t gfp_flags;
+ long timeout;
};
struct nfs4_getdeviceinfo_args {
diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
index 36112cdd665a..b90d8ec57c1f 100644
--- a/include/linux/of_dma.h
+++ b/include/linux/of_dma.h
@@ -80,7 +80,7 @@ static inline int of_dma_router_register(struct device_node *np,
static inline struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
const char *name)
{
- return NULL;
+ return ERR_PTR(-ENODEV);
}
static inline struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 039f2eec49ce..1e0deb8e8494 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -46,12 +46,14 @@ extern int of_irq_get(struct device_node *dev, int index);
extern int of_irq_get_byname(struct device_node *dev, const char *name);
extern int of_irq_to_resource_table(struct device_node *dev,
struct resource *res, int nr_irqs);
+extern struct device_node *of_irq_find_parent(struct device_node *child);
extern struct irq_domain *of_msi_get_domain(struct device *dev,
struct device_node *np,
enum irq_domain_bus_token token);
extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev,
u32 rid);
extern void of_msi_configure(struct device *dev, struct device_node *np);
+u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in);
#else
static inline int of_irq_count(struct device_node *dev)
{
@@ -70,6 +72,11 @@ static inline int of_irq_to_resource_table(struct device_node *dev,
{
return 0;
}
+static inline void *of_irq_find_parent(struct device_node *child)
+{
+ return NULL;
+}
+
static inline struct irq_domain *of_msi_get_domain(struct device *dev,
struct device_node *np,
enum irq_domain_bus_token token)
@@ -84,6 +91,11 @@ static inline struct irq_domain *of_msi_map_get_device_domain(struct device *dev
static inline void of_msi_configure(struct device *dev, struct device_node *np)
{
}
+static inline u32 of_msi_map_rid(struct device *dev,
+ struct device_node *msi_np, u32 rid_in)
+{
+ return rid_in;
+}
#endif
#if defined(CONFIG_OF_IRQ) || defined(CONFIG_SPARC)
@@ -93,7 +105,6 @@ static inline void of_msi_configure(struct device *dev, struct device_node *np)
* so declare it here regardless of the CONFIG_OF_IRQ setting.
*/
extern unsigned int irq_of_parse_and_map(struct device_node *node, int index);
-u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in);
#else /* !CONFIG_OF && !CONFIG_SPARC */
static inline unsigned int irq_of_parse_and_map(struct device_node *dev,
@@ -101,12 +112,6 @@ static inline unsigned int irq_of_parse_and_map(struct device_node *dev,
{
return 0;
}
-
-static inline u32 of_msi_map_rid(struct device *dev,
- struct device_node *msi_np, u32 rid_in)
-{
- return rid_in;
-}
#endif /* !CONFIG_OF */
#endif /* __OF_IRQ_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index e828e7b4afec..d86378c226fb 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -412,9 +412,18 @@ struct pci_host_bridge {
void (*release_fn)(struct pci_host_bridge *);
void *release_data;
unsigned int ignore_reset_delay:1; /* for entire hierarchy */
+ /* Resource alignment requirements */
+ resource_size_t (*align_resource)(struct pci_dev *dev,
+ const struct resource *res,
+ resource_size_t start,
+ resource_size_t size,
+ resource_size_t align);
};
#define to_pci_host_bridge(n) container_of(n, struct pci_host_bridge, dev)
+
+struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
+
void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
void (*release_fn)(struct pci_host_bridge *),
void *release_data);
@@ -1937,6 +1946,16 @@ static inline struct irq_domain *
pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
#endif /* CONFIG_OF */
+#ifdef CONFIG_ACPI
+struct irq_domain *pci_host_bridge_acpi_msi_domain(struct pci_bus *bus);
+
+void
+pci_msi_register_fwnode_provider(struct fwnode_handle *(*fn)(struct device *));
+#else
+static inline struct irq_domain *
+pci_host_bridge_acpi_msi_domain(struct pci_bus *bus) { return NULL; }
+#endif
+
#ifdef CONFIG_EEH
static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev)
{
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d841d33bcdc9..f9828a48f16a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -697,9 +697,11 @@ struct perf_cgroup {
* if there is no cgroup event for the current CPU context.
*/
static inline struct perf_cgroup *
-perf_cgroup_from_task(struct task_struct *task)
+perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
{
- return container_of(task_css(task, perf_event_cgrp_id),
+ return container_of(task_css_check(task, perf_event_cgrp_id,
+ ctx ? lockdep_is_held(&ctx->lock)
+ : true),
struct perf_cgroup, css);
}
#endif /* CONFIG_CGROUP_PERF */
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index e2878baeb90e..4299f4ba03bd 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -72,7 +72,7 @@ struct edma_soc_info {
struct edma_rsv_info *rsv;
/* List of channels allocated for memcpy, terminated with -1 */
- s16 *memcpy_channels;
+ s32 *memcpy_channels;
s8 (*queue_priority_mapping)[2];
const s16 (*xbar_chans)[2];
diff --git a/include/linux/platform_data/irq-renesas-intc-irqpin.h b/include/linux/platform_data/irq-renesas-intc-irqpin.h
deleted file mode 100644
index e4cb911066a6..000000000000
--- a/include/linux/platform_data/irq-renesas-intc-irqpin.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Renesas INTC External IRQ Pin Driver
- *
- * Copyright (C) 2013 Magnus Damm
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef __IRQ_RENESAS_INTC_IRQPIN_H__
-#define __IRQ_RENESAS_INTC_IRQPIN_H__
-
-struct renesas_intc_irqpin_config {
- unsigned int sense_bitfield_width;
- unsigned int irq_base;
- bool control_parent;
-};
-
-#endif /* __IRQ_RENESAS_INTC_IRQPIN_H__ */
diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h
index 6f14ee295822..e5e8ec40278d 100644
--- a/include/linux/posix_acl_xattr.h
+++ b/include/linux/posix_acl_xattr.h
@@ -9,16 +9,12 @@
#ifndef _POSIX_ACL_XATTR_H
#define _POSIX_ACL_XATTR_H
+#include <uapi/linux/xattr.h>
#include <linux/posix_acl.h>
-/* Extended attribute names */
-#define POSIX_ACL_XATTR_ACCESS "system.posix_acl_access"
-#define POSIX_ACL_XATTR_DEFAULT "system.posix_acl_default"
-
/* Supported ACL a_version fields */
#define POSIX_ACL_XATTR_VERSION 0x0002
-
/* An undefined entry e_id value */
#define ACL_UNDEFINED_ID (-1)
diff --git a/include/linux/proportions.h b/include/linux/proportions.h
index 5440f64d2942..21221338ad18 100644
--- a/include/linux/proportions.h
+++ b/include/linux/proportions.h
@@ -1,7 +1,7 @@
/*
* FLoating proportions
*
- * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
*
* This file contains the public data structure and API definitions.
*/
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 6a4347639c03..1d1ba2c5ee7a 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -9,6 +9,8 @@
#ifndef __COMMON_HSI__
#define __COMMON_HSI__
+#define CORE_SPQE_PAGE_SIZE_BYTES 4096
+
#define FW_MAJOR_VERSION 8
#define FW_MINOR_VERSION 4
#define FW_REVISION_VERSION 2
diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
index b920c3605c46..41b9049b57e2 100644
--- a/include/linux/qed/qed_chain.h
+++ b/include/linux/qed/qed_chain.h
@@ -111,7 +111,8 @@ static inline u16 qed_chain_get_elem_left(struct qed_chain *p_chain)
used = ((u32)0x10000u + (u32)(p_chain->prod_idx)) -
(u32)p_chain->cons_idx;
if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR)
- used -= (used / p_chain->elem_per_page);
+ used -= p_chain->prod_idx / p_chain->elem_per_page -
+ p_chain->cons_idx / p_chain->elem_per_page;
return p_chain->capacity - used;
}
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 5ed540986019..14ec1652daf4 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -179,32 +179,31 @@ static inline void list_replace_rcu(struct list_head *old,
}
/**
- * list_splice_init_rcu - splice an RCU-protected list into an existing list.
+ * __list_splice_init_rcu - join an RCU-protected list into an existing list.
* @list: the RCU-protected list to splice
- * @head: the place in the list to splice the first list into
+ * @prev: points to the last element of the existing list
+ * @next: points to the first element of the existing list
* @sync: function to sync: synchronize_rcu(), synchronize_sched(), ...
*
- * @head can be RCU-read traversed concurrently with this function.
+ * The list pointed to by @prev and @next can be RCU-read traversed
+ * concurrently with this function.
*
* Note that this function blocks.
*
- * Important note: the caller must take whatever action is necessary to
- * prevent any other updates to @head. In principle, it is possible
- * to modify the list as soon as sync() begins execution.
- * If this sort of thing becomes necessary, an alternative version
- * based on call_rcu() could be created. But only if -really-
- * needed -- there is no shortage of RCU API members.
+ * Important note: the caller must take whatever action is necessary to prevent
+ * any other updates to the existing list. In principle, it is possible to
+ * modify the list as soon as sync() begins execution. If this sort of thing
+ * becomes necessary, an alternative version based on call_rcu() could be
+ * created. But only if -really- needed -- there is no shortage of RCU API
+ * members.
*/
-static inline void list_splice_init_rcu(struct list_head *list,
- struct list_head *head,
- void (*sync)(void))
+static inline void __list_splice_init_rcu(struct list_head *list,
+ struct list_head *prev,
+ struct list_head *next,
+ void (*sync)(void))
{
struct list_head *first = list->next;
struct list_head *last = list->prev;
- struct list_head *at = head->next;
-
- if (list_empty(list))
- return;
/*
* "first" and "last" tracking list, so initialize it. RCU readers
@@ -231,10 +230,40 @@ static inline void list_splice_init_rcu(struct list_head *list,
* this function.
*/
- last->next = at;
- rcu_assign_pointer(list_next_rcu(head), first);
- first->prev = head;
- at->prev = last;
+ last->next = next;
+ rcu_assign_pointer(list_next_rcu(prev), first);
+ first->prev = prev;
+ next->prev = last;
+}
+
+/**
+ * list_splice_init_rcu - splice an RCU-protected list into an existing list,
+ * designed for stacks.
+ * @list: the RCU-protected list to splice
+ * @head: the place in the existing list to splice the first list into
+ * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ...
+ */
+static inline void list_splice_init_rcu(struct list_head *list,
+ struct list_head *head,
+ void (*sync)(void))
+{
+ if (!list_empty(list))
+ __list_splice_init_rcu(list, head, head->next, sync);
+}
+
+/**
+ * list_splice_tail_init_rcu - splice an RCU-protected list into an existing
+ * list, designed for queues.
+ * @list: the RCU-protected list to splice
+ * @head: the place in the existing list to splice the first list into
+ * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ...
+ */
+static inline void list_splice_tail_init_rcu(struct list_head *list,
+ struct list_head *head,
+ void (*sync)(void))
+{
+ if (!list_empty(list))
+ __list_splice_init_rcu(list, head->prev, head, sync);
}
/**
@@ -305,6 +334,42 @@ static inline void list_splice_init_rcu(struct list_head *list,
pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
/**
+ * list_entry_lockless - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_head within the struct.
+ *
+ * This primitive may safely run concurrently with the _rcu list-mutation
+ * primitives such as list_add_rcu(), but requires some implicit RCU
+ * read-side guarding. One example is running within a special
+ * exception-time environment where preemption is disabled and where
+ * lockdep cannot be invoked (in which case updaters must use RCU-sched,
+ * as in synchronize_sched(), call_rcu_sched(), and friends). Another
+ * example is when items are added to the list, but never deleted.
+ */
+#define list_entry_lockless(ptr, type, member) \
+ container_of((typeof(ptr))lockless_dereference(ptr), type, member)
+
+/**
+ * list_for_each_entry_lockless - iterate over rcu list of given type
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ *
+ * This primitive may safely run concurrently with the _rcu list-mutation
+ * primitives such as list_add_rcu(), but requires some implicit RCU
+ * read-side guarding. One example is running within a special
+ * exception-time environment where preemption is disabled and where
+ * lockdep cannot be invoked (in which case updaters must use RCU-sched,
+ * as in synchronize_sched(), call_rcu_sched(), and friends). Another
+ * example is when items are added to the list, but never deleted.
+ */
+#define list_for_each_entry_lockless(pos, head, member) \
+ for (pos = list_entry_lockless((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry_lockless(pos->member.next, typeof(*pos), member))
+
+/**
* list_for_each_entry_continue_rcu - continue iteration over list of given type
* @pos: the type * to use as a loop cursor.
* @head: the head for your list.
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a0189ba67fde..14e6f47ee16f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -48,10 +48,17 @@
#include <asm/barrier.h>
+#ifndef CONFIG_TINY_RCU
extern int rcu_expedited; /* for sysctl */
+extern int rcu_normal; /* also for sysctl */
+#endif /* #ifndef CONFIG_TINY_RCU */
#ifdef CONFIG_TINY_RCU
/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
+static inline bool rcu_gp_is_normal(void) /* Internal RCU use. */
+{
+ return true;
+}
static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */
{
return false;
@@ -65,6 +72,7 @@ static inline void rcu_unexpedite_gp(void)
{
}
#else /* #ifdef CONFIG_TINY_RCU */
+bool rcu_gp_is_normal(void); /* Internal RCU use. */
bool rcu_gp_is_expedited(void); /* Internal RCU use. */
void rcu_expedite_gp(void);
void rcu_unexpedite_gp(void);
@@ -321,7 +329,6 @@ static inline int rcu_preempt_depth(void)
/* Internal to kernel */
void rcu_init(void);
-void rcu_end_inkernel_boot(void);
void rcu_sched_qs(void);
void rcu_bh_qs(void);
void rcu_check_callbacks(int user);
@@ -329,6 +336,12 @@ struct notifier_block;
int rcu_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu);
+#ifndef CONFIG_TINY_RCU
+void rcu_end_inkernel_boot(void);
+#else /* #ifndef CONFIG_TINY_RCU */
+static inline void rcu_end_inkernel_boot(void) { }
+#endif /* #ifndef CONFIG_TINY_RCU */
+
#ifdef CONFIG_RCU_STALL_COMMON
void rcu_sysrq_start(void);
void rcu_sysrq_end(void);
@@ -379,9 +392,9 @@ static inline void rcu_init_nohz(void)
*/
#define RCU_NONIDLE(a) \
do { \
- rcu_irq_enter(); \
+ rcu_irq_enter_irqson(); \
do { a; } while (0); \
- rcu_irq_exit(); \
+ rcu_irq_exit_irqson(); \
} while (0)
/*
@@ -741,7 +754,7 @@ static inline void rcu_preempt_sleep_check(void)
* The tracing infrastructure traces RCU (we want that), but unfortunately
* some of the RCU checks causes tracing to lock up the system.
*
- * The tracing version of rcu_dereference_raw() must not call
+ * The no-tracing version of rcu_dereference_raw() must not call
* rcu_read_lock_held().
*/
#define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu)
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 4c1aaf9cce7b..64809aea661c 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -181,6 +181,14 @@ static inline void rcu_irq_enter(void)
{
}
+static inline void rcu_irq_exit_irqson(void)
+{
+}
+
+static inline void rcu_irq_enter_irqson(void)
+{
+}
+
static inline void rcu_irq_exit(void)
{
}
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 60d15a080d7c..ad1eda9fa4da 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -37,7 +37,7 @@ void rcu_cpu_stall_reset(void);
/*
* Note a virtualization-based context switch. This is simply a
* wrapper around rcu_note_context_switch(), which allows TINY_RCU
- * to save a few bytes.
+ * to save a few bytes. The caller must have disabled interrupts.
*/
static inline void rcu_virt_note_context_switch(int cpu)
{
@@ -97,6 +97,8 @@ void rcu_idle_enter(void);
void rcu_idle_exit(void);
void rcu_irq_enter(void);
void rcu_irq_exit(void);
+void rcu_irq_enter_irqson(void);
+void rcu_irq_exit_irqson(void);
void exit_rcu(void);
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 843ceca9a21e..e50b31d18462 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -19,6 +19,7 @@
#include <linux/atomic.h>
#include <linux/compiler.h>
+#include <linux/err.h>
#include <linux/errno.h>
#include <linux/jhash.h>
#include <linux/list_nulls.h>
@@ -339,10 +340,11 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
int rhashtable_init(struct rhashtable *ht,
const struct rhashtable_params *params);
-int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
- struct rhash_head *obj,
- struct bucket_table *old_tbl);
-int rhashtable_insert_rehash(struct rhashtable *ht);
+struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
+ const void *key,
+ struct rhash_head *obj,
+ struct bucket_table *old_tbl);
+int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl);
int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter);
void rhashtable_walk_exit(struct rhashtable_iter *iter);
@@ -598,9 +600,11 @@ restart:
new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
if (unlikely(new_tbl)) {
- err = rhashtable_insert_slow(ht, key, obj, new_tbl);
- if (err == -EAGAIN)
+ tbl = rhashtable_insert_slow(ht, key, obj, new_tbl);
+ if (!IS_ERR_OR_NULL(tbl))
goto slow_path;
+
+ err = PTR_ERR(tbl);
goto out;
}
@@ -611,7 +615,7 @@ restart:
if (unlikely(rht_grow_above_100(ht, tbl))) {
slow_path:
spin_unlock_bh(lock);
- err = rhashtable_insert_rehash(ht);
+ err = rhashtable_insert_rehash(ht, tbl);
rcu_read_unlock();
if (err)
return err;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index edad7a43edea..4bae8ab3b893 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -177,9 +177,9 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
extern void calc_global_load(unsigned long ticks);
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-extern void update_cpu_load_nohz(void);
+extern void update_cpu_load_nohz(int active);
#else
-static inline void update_cpu_load_nohz(void) { }
+static inline void update_cpu_load_nohz(int active) { }
#endif
extern unsigned long get_parent_ip(unsigned long addr);
@@ -377,6 +377,7 @@ extern void scheduler_tick(void);
extern void sched_show_task(struct task_struct *p);
#ifdef CONFIG_LOCKUP_DETECTOR
+extern void touch_softlockup_watchdog_sched(void);
extern void touch_softlockup_watchdog(void);
extern void touch_softlockup_watchdog_sync(void);
extern void touch_all_softlockup_watchdogs(void);
@@ -387,6 +388,9 @@ extern unsigned int softlockup_panic;
extern unsigned int hardlockup_panic;
void lockup_detector_init(void);
#else
+static inline void touch_softlockup_watchdog_sched(void)
+{
+}
static inline void touch_softlockup_watchdog(void)
{
}
@@ -1268,8 +1272,13 @@ struct sched_entity {
#endif
#ifdef CONFIG_SMP
- /* Per entity load average tracking */
- struct sched_avg avg;
+ /*
+ * Per entity load average tracking.
+ *
+ * Put into separate cache line so it does not
+ * collide with read-mostly values above.
+ */
+ struct sched_avg avg ____cacheline_aligned_in_smp;
#endif
};
@@ -1455,14 +1464,15 @@ struct task_struct {
/* Used for emulating ABI behavior of previous Linux versions */
unsigned int personality;
- unsigned in_execve:1; /* Tell the LSMs that the process is doing an
- * execve */
- unsigned in_iowait:1;
-
- /* Revert to default priority/policy when forking */
+ /* scheduler bits, serialized by scheduler locks */
unsigned sched_reset_on_fork:1;
unsigned sched_contributes_to_load:1;
unsigned sched_migrated:1;
+ unsigned :0; /* force alignment to the next boundary */
+
+ /* unserialized, strictly 'current' */
+ unsigned in_execve:1; /* bit to tell LSMs we're in execve */
+ unsigned in_iowait:1;
#ifdef CONFIG_MEMCG
unsigned memcg_may_oom:1;
#endif
@@ -1519,11 +1529,14 @@ struct task_struct {
cputime_t gtime;
struct prev_cputime prev_cputime;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
- seqlock_t vtime_seqlock;
+ seqcount_t vtime_seqcount;
unsigned long long vtime_snap;
enum {
- VTIME_SLEEPING = 0,
+ /* Task is sleeping or running in a CPU with VTIME inactive */
+ VTIME_INACTIVE = 0,
+ /* Task runs in userspace in a CPU with VTIME active */
VTIME_USER,
+ /* Task runs in kernelspace in a CPU with VTIME active */
VTIME_SYS,
} vtime_snap_whence;
#endif
@@ -2002,7 +2015,8 @@ static inline int pid_alive(const struct task_struct *p)
}
/**
- * is_global_init - check if a task structure is init
+ * is_global_init - check if a task structure is init. Since init
+ * is free to have sub-threads we need to check tgid.
* @tsk: Task structure to be checked.
*
* Check if a task structure is the first user space task the kernel created.
@@ -2011,7 +2025,7 @@ static inline int pid_alive(const struct task_struct *p)
*/
static inline int is_global_init(struct task_struct *tsk)
{
- return tsk->pid == 1;
+ return task_tgid_nr(tsk) == 1;
}
extern struct pid *cad_pid;
diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
index efa931c5cef1..411b52e424e1 100644
--- a/include/linux/sched_clock.h
+++ b/include/linux/sched_clock.h
@@ -10,11 +10,17 @@
#ifdef CONFIG_GENERIC_SCHED_CLOCK
extern void sched_clock_postinit(void);
-#else
-static inline void sched_clock_postinit(void) { }
-#endif
extern void sched_clock_register(u64 (*read)(void), int bits,
unsigned long rate);
+#else
+static inline void sched_clock_postinit(void) { }
+
+static inline void sched_clock_register(u64 (*read)(void), int bits,
+ unsigned long rate)
+{
+ ;
+}
+#endif
#endif
diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h
index 80af3cd35ae4..72ce932c69b2 100644
--- a/include/linux/scpi_protocol.h
+++ b/include/linux/scpi_protocol.h
@@ -71,7 +71,7 @@ struct scpi_ops {
int (*sensor_get_value)(u16, u32 *);
};
-#if IS_ENABLED(CONFIG_ARM_SCPI_PROTOCOL)
+#if IS_REACHABLE(CONFIG_ARM_SCPI_PROTOCOL)
struct scpi_ops *get_scpi_ops(void);
#else
static inline struct scpi_ops *get_scpi_ops(void) { return NULL; }
diff --git a/include/linux/signal.h b/include/linux/signal.h
index ab1e0392b5ac..92557bbce7e7 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -239,7 +239,6 @@ extern int sigprocmask(int, sigset_t *, sigset_t *);
extern void set_current_blocked(sigset_t *);
extern void __set_current_blocked(const sigset_t *);
extern int show_unhandled_signals;
-extern int sigsuspend(sigset_t *);
struct sigaction {
#ifndef __ARCH_HAS_IRIX_SIGACTION
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 7c82e3b307a3..2037a861e367 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -158,6 +158,24 @@ size_t ksize(const void *);
#endif
/*
+ * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
+ * Intended for arches that get misalignment faults even for 64 bit integer
+ * aligned buffers.
+ */
+#ifndef ARCH_SLAB_MINALIGN
+#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
+#endif
+
+/*
+ * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned
+ * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN
+ * aligned pointers.
+ */
+#define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN)
+#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN)
+#define __assume_page_alignment __assume_aligned(PAGE_SIZE)
+
+/*
* Kmalloc array related definitions
*/
@@ -286,8 +304,8 @@ static __always_inline int kmalloc_index(size_t size)
}
#endif /* !CONFIG_SLOB */
-void *__kmalloc(size_t size, gfp_t flags);
-void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags);
+void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment;
+void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment;
void kmem_cache_free(struct kmem_cache *, void *);
/*
@@ -298,11 +316,11 @@ void kmem_cache_free(struct kmem_cache *, void *);
* Note that interrupts must be enabled when calling these functions.
*/
void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
-bool kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
+int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
#ifdef CONFIG_NUMA
-void *__kmalloc_node(size_t size, gfp_t flags, int node);
-void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
+void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment;
+void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment;
#else
static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
@@ -316,12 +334,12 @@ static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t f
#endif
#ifdef CONFIG_TRACING
-extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t);
+extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t) __assume_slab_alignment;
#ifdef CONFIG_NUMA
extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
gfp_t gfpflags,
- int node, size_t size);
+ int node, size_t size) __assume_slab_alignment;
#else
static __always_inline void *
kmem_cache_alloc_node_trace(struct kmem_cache *s,
@@ -354,10 +372,10 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
}
#endif /* CONFIG_TRACING */
-extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
+extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment;
#ifdef CONFIG_TRACING
-extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
+extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment;
#else
static __always_inline void *
kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
@@ -482,15 +500,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
return __kmalloc_node(size, flags, node);
}
-/*
- * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
- * Intended for arches that get misalignment faults even for 64 bit integer
- * aligned buffers.
- */
-#ifndef ARCH_SLAB_MINALIGN
-#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
-#endif
-
struct memcg_cache_array {
struct rcu_head rcu;
struct kmem_cache *entries[0];
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 0adedca24c5b..3cc9632dcc2a 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -29,7 +29,7 @@ struct cpu_stop_work {
int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg);
-void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
+bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
struct cpu_stop_work *work_buf);
int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
@@ -65,7 +65,7 @@ static void stop_one_cpu_nowait_workfn(struct work_struct *work)
preempt_enable();
}
-static inline void stop_one_cpu_nowait(unsigned int cpu,
+static inline bool stop_one_cpu_nowait(unsigned int cpu,
cpu_stop_fn_t fn, void *arg,
struct cpu_stop_work *work_buf)
{
@@ -74,7 +74,10 @@ static inline void stop_one_cpu_nowait(unsigned int cpu,
work_buf->fn = fn;
work_buf->arg = arg;
schedule_work(&work_buf->work);
+ return true;
}
+
+ return false;
}
static inline int stop_cpus(const struct cpumask *cpumask,
@@ -99,7 +102,7 @@ static inline int try_stop_cpus(const struct cpumask *cpumask,
* grabbing every spinlock (and more). So the "read" side to such a
* lock is anything which disables preemption.
*/
-#if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
+#if defined(CONFIG_SMP) || defined(CONFIG_HOTPLUG_CPU)
/**
* stop_machine: freeze the machine on all CPUs and run this function
@@ -118,7 +121,7 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus);
-#else /* CONFIG_STOP_MACHINE && CONFIG_SMP */
+#else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
static inline int stop_machine(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus)
@@ -137,5 +140,5 @@ static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
return stop_machine(fn, data, cpus);
}
-#endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */
+#endif /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
#endif /* _LINUX_STOP_MACHINE */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a156b82dd14c..c2b66a277e98 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -524,7 +524,7 @@ asmlinkage long sys_chown(const char __user *filename,
asmlinkage long sys_lchown(const char __user *filename,
uid_t user, gid_t group);
asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group);
-#ifdef CONFIG_UID16
+#ifdef CONFIG_HAVE_UID16
asmlinkage long sys_chown16(const char __user *filename,
old_uid_t user, old_gid_t group);
asmlinkage long sys_lchown16(const char __user *filename,
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 4014a59828fc..613c29bd6baf 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -438,7 +438,8 @@ static inline void thermal_zone_device_unregister(
static inline int thermal_zone_bind_cooling_device(
struct thermal_zone_device *tz, int trip,
struct thermal_cooling_device *cdev,
- unsigned long upper, unsigned long lower)
+ unsigned long upper, unsigned long lower,
+ unsigned int weight)
{ return -ENODEV; }
static inline int thermal_zone_unbind_cooling_device(
struct thermal_zone_device *tz, int trip,
diff --git a/include/linux/time.h b/include/linux/time.h
index beebe3a02d43..297f09f23896 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -125,6 +125,32 @@ static inline bool timeval_valid(const struct timeval *tv)
extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
+/*
+ * Validates if a timespec/timeval used to inject a time offset is valid.
+ * Offsets can be postive or negative. The value of the timeval/timespec
+ * is the sum of its fields, but *NOTE*: the field tv_usec/tv_nsec must
+ * always be non-negative.
+ */
+static inline bool timeval_inject_offset_valid(const struct timeval *tv)
+{
+ /* We don't check the tv_sec as it can be positive or negative */
+
+ /* Can't have more microseconds then a second */
+ if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC)
+ return false;
+ return true;
+}
+
+static inline bool timespec_inject_offset_valid(const struct timespec *ts)
+{
+ /* We don't check the tv_sec as it can be positive or negative */
+
+ /* Can't have more nanoseconds then a second */
+ if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC)
+ return false;
+ return true;
+}
+
#define CURRENT_TIME (current_kernel_time())
#define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 })
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
new file mode 100644
index 000000000000..e1ee97c713bf
--- /dev/null
+++ b/include/linux/tracepoint-defs.h
@@ -0,0 +1,27 @@
+#ifndef TRACEPOINT_DEFS_H
+#define TRACEPOINT_DEFS_H 1
+
+/*
+ * File can be included directly by headers who only want to access
+ * tracepoint->key to guard out of line trace calls. Otherwise
+ * linux/tracepoint.h should be used.
+ */
+
+#include <linux/atomic.h>
+#include <linux/static_key.h>
+
+struct tracepoint_func {
+ void *func;
+ void *data;
+ int prio;
+};
+
+struct tracepoint {
+ const char *name; /* Tracepoint name */
+ struct static_key key;
+ void (*regfunc)(void);
+ void (*unregfunc)(void);
+ struct tracepoint_func __rcu *funcs;
+};
+
+#endif
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 696a339c592c..78e8397a1800 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -17,26 +17,12 @@
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/rcupdate.h>
-#include <linux/static_key.h>
+#include <linux/tracepoint-defs.h>
struct module;
struct tracepoint;
struct notifier_block;
-struct tracepoint_func {
- void *func;
- void *data;
- int prio;
-};
-
-struct tracepoint {
- const char *name; /* Tracepoint name */
- struct static_key key;
- void (*regfunc)(void);
- void (*unregfunc)(void);
- struct tracepoint_func __rcu *funcs;
-};
-
struct trace_enum_map {
const char *system;
const char *enum_string;
@@ -171,8 +157,8 @@ extern void syscall_unregfunc(void);
TP_PROTO(data_proto), \
TP_ARGS(data_args), \
TP_CONDITION(cond), \
- rcu_irq_enter(), \
- rcu_irq_exit()); \
+ rcu_irq_enter_irqson(), \
+ rcu_irq_exit_irqson()); \
}
#else
#define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args)
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 5b04b0a5375b..5e31f1b99037 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -607,7 +607,7 @@ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops);
/* tty_audit.c */
#ifdef CONFIG_AUDIT
-extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data,
+extern void tty_audit_add_data(struct tty_struct *tty, const void *data,
size_t size, unsigned icanon);
extern void tty_audit_exit(void);
extern void tty_audit_fork(struct signal_struct *sig);
@@ -615,8 +615,8 @@ extern void tty_audit_tiocsti(struct tty_struct *tty, char ch);
extern void tty_audit_push(struct tty_struct *tty);
extern int tty_audit_push_current(void);
#else
-static inline void tty_audit_add_data(struct tty_struct *tty,
- unsigned char *data, size_t size, unsigned icanon)
+static inline void tty_audit_add_data(struct tty_struct *tty, const void *data,
+ size_t size, unsigned icanon)
{
}
static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch)
diff --git a/include/linux/types.h b/include/linux/types.h
index 70d8500bddf1..70dd3dfde631 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -35,7 +35,7 @@ typedef __kernel_gid16_t gid16_t;
typedef unsigned long uintptr_t;
-#ifdef CONFIG_UID16
+#ifdef CONFIG_HAVE_UID16
/* This is defined by include/asm-{arch}/posix_types.h */
typedef __kernel_old_uid_t old_uid_t;
typedef __kernel_old_gid_t old_gid_t;
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 0bdc72f36905..4a29c75b146e 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -21,7 +21,7 @@
* Authors:
* Srikar Dronamraju
* Jim Keniston
- * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
*/
#include <linux/errno.h>
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 1f6526c76ee8..3a375d07d0dc 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -138,6 +138,7 @@ struct cdc_ncm_ctx {
};
u8 cdc_ncm_select_altsetting(struct usb_interface *intf);
+int cdc_ncm_change_mtu(struct net_device *net, int new_mtu);
int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags);
void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf);
struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign);
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index 9948c874e3f1..1d0043dc34e4 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -47,4 +47,7 @@
/* device generates spurious wakeup, ignore remote wakeup capability */
#define USB_QUIRK_IGNORE_REMOTE_WAKEUP BIT(9)
+/* device can't handle Link Power Management */
+#define USB_QUIRK_NO_LPM BIT(10)
+
#endif /* __LINUX_USB_QUIRKS_H */
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 610a86a892b8..ddb440975382 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -44,9 +44,6 @@ struct vfio_device_ops {
void (*request)(void *device_data, unsigned int count);
};
-extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
-extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
-
extern int vfio_add_group_dev(struct device *dev,
const struct vfio_device_ops *ops,
void *device_data);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 5dbc8b0ee567..3e5d9075960f 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -176,11 +176,11 @@ extern void zone_statistics(struct zone *, struct zone *, gfp_t gfp);
#define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d))
#ifdef CONFIG_SMP
-void __mod_zone_page_state(struct zone *, enum zone_stat_item item, int);
+void __mod_zone_page_state(struct zone *, enum zone_stat_item item, long);
void __inc_zone_page_state(struct page *, enum zone_stat_item);
void __dec_zone_page_state(struct page *, enum zone_stat_item);
-void mod_zone_page_state(struct zone *, enum zone_stat_item, int);
+void mod_zone_page_state(struct zone *, enum zone_stat_item, long);
void inc_zone_page_state(struct page *, enum zone_stat_item);
void dec_zone_page_state(struct page *, enum zone_stat_item);
@@ -205,7 +205,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat,
* The functions directly modify the zone and global counters.
*/
static inline void __mod_zone_page_state(struct zone *zone,
- enum zone_stat_item item, int delta)
+ enum zone_stat_item item, long delta)
{
zone_page_state_add(delta, zone, item);
}
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index c5165fd256f9..fa2196990f84 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -10,16 +10,27 @@
struct task_struct;
/*
- * vtime_accounting_enabled() definitions/declarations
+ * vtime_accounting_cpu_enabled() definitions/declarations
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-static inline bool vtime_accounting_enabled(void) { return true; }
+static inline bool vtime_accounting_cpu_enabled(void) { return true; }
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+/*
+ * Checks if vtime is enabled on some CPU. Cputime readers want to be careful
+ * in that case and compute the tickless cputime.
+ * For now vtime state is tied to context tracking. We might want to decouple
+ * those later if necessary.
+ */
static inline bool vtime_accounting_enabled(void)
{
- if (context_tracking_is_enabled()) {
+ return context_tracking_is_enabled();
+}
+
+static inline bool vtime_accounting_cpu_enabled(void)
+{
+ if (vtime_accounting_enabled()) {
if (context_tracking_cpu_is_enabled())
return true;
}
@@ -29,7 +40,7 @@ static inline bool vtime_accounting_enabled(void)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
-static inline bool vtime_accounting_enabled(void) { return false; }
+static inline bool vtime_accounting_cpu_enabled(void) { return false; }
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
@@ -44,7 +55,7 @@ extern void vtime_task_switch(struct task_struct *prev);
extern void vtime_common_task_switch(struct task_struct *prev);
static inline void vtime_task_switch(struct task_struct *prev)
{
- if (vtime_accounting_enabled())
+ if (vtime_accounting_cpu_enabled())
vtime_common_task_switch(prev);
}
#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
@@ -59,7 +70,7 @@ extern void vtime_account_irq_enter(struct task_struct *tsk);
extern void vtime_common_account_irq_enter(struct task_struct *tsk);
static inline void vtime_account_irq_enter(struct task_struct *tsk)
{
- if (vtime_accounting_enabled())
+ if (vtime_accounting_cpu_enabled())
vtime_common_account_irq_enter(tsk);
}
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
@@ -78,7 +89,7 @@ extern void vtime_gen_account_irq_exit(struct task_struct *tsk);
static inline void vtime_account_irq_exit(struct task_struct *tsk)
{
- if (vtime_accounting_enabled())
+ if (vtime_accounting_cpu_enabled())
vtime_gen_account_irq_exit(tsk);
}
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 1e1bf9f963a9..d2f4ec7dba7c 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -102,6 +102,36 @@ init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
q->func = func;
}
+/**
+ * waitqueue_active -- locklessly test for waiters on the queue
+ * @q: the waitqueue to test for waiters
+ *
+ * returns true if the wait list is not empty
+ *
+ * NOTE: this function is lockless and requires care, incorrect usage _will_
+ * lead to sporadic and non-obvious failure.
+ *
+ * Use either while holding wait_queue_head_t::lock or when used for wakeups
+ * with an extra smp_mb() like:
+ *
+ * CPU0 - waker CPU1 - waiter
+ *
+ * for (;;) {
+ * @cond = true; prepare_to_wait(&wq, &wait, state);
+ * smp_mb(); // smp_mb() from set_current_state()
+ * if (waitqueue_active(wq)) if (@cond)
+ * wake_up(wq); break;
+ * schedule();
+ * }
+ * finish_wait(&wq, &wait);
+ *
+ * Because without the explicit smp_mb() it's possible for the
+ * waitqueue_active() load to get hoisted over the @cond store such that we'll
+ * observe an empty wait list while the waiter might not observe @cond.
+ *
+ * Also note that this 'optimization' trades a spin_lock() for an smp_mb(),
+ * which (when the lock is uncontended) are of roughly equal cost.
+ */
static inline int waitqueue_active(wait_queue_head_t *q)
{
return !list_empty(&q->task_list);
@@ -145,7 +175,7 @@ __remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
list_del(&old->task_list);
}
-typedef int wait_bit_action_f(struct wait_bit_key *);
+typedef int wait_bit_action_f(struct wait_bit_key *, int mode);
void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
@@ -960,10 +990,10 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
} while (0)
-extern int bit_wait(struct wait_bit_key *);
-extern int bit_wait_io(struct wait_bit_key *);
-extern int bit_wait_timeout(struct wait_bit_key *);
-extern int bit_wait_io_timeout(struct wait_bit_key *);
+extern int bit_wait(struct wait_bit_key *, int);
+extern int bit_wait_io(struct wait_bit_key *, int);
+extern int bit_wait_timeout(struct wait_bit_key *, int);
+extern int bit_wait_io_timeout(struct wait_bit_key *, int);
/**
* wait_on_bit - wait for a bit to be cleared
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0197358f1e81..0e32bc71245e 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -618,4 +618,10 @@ static inline int workqueue_sysfs_register(struct workqueue_struct *wq)
{ return 0; }
#endif /* CONFIG_SYSFS */
+#ifdef CONFIG_WQ_WATCHDOG
+void wq_watchdog_touch(int cpu);
+#else /* CONFIG_WQ_WATCHDOG */
+static inline void wq_watchdog_touch(int cpu) { }
+#endif /* CONFIG_WQ_WATCHDOG */
+
#endif
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 89474b9d260c..4457541de3c9 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -19,12 +19,16 @@
struct inode;
struct dentry;
+/*
+ * struct xattr_handler: When @name is set, match attributes with exactly that
+ * name. When @prefix is set instead, match attributes with that prefix and
+ * with a non-empty suffix.
+ */
struct xattr_handler {
+ const char *name;
const char *prefix;
int flags; /* fs private flags */
- size_t (*list)(const struct xattr_handler *, struct dentry *dentry,
- char *list, size_t list_size, const char *name,
- size_t name_len);
+ bool (*list)(struct dentry *dentry);
int (*get)(const struct xattr_handler *, struct dentry *dentry,
const char *name, void *buffer, size_t size);
int (*set)(const struct xattr_handler *, struct dentry *dentry,
@@ -53,8 +57,11 @@ int generic_setxattr(struct dentry *dentry, const char *name, const void *value,
int generic_removexattr(struct dentry *dentry, const char *name);
ssize_t vfs_getxattr_alloc(struct dentry *dentry, const char *name,
char **xattr_value, size_t size, gfp_t flags);
-int vfs_xattr_cmp(struct dentry *dentry, const char *xattr_name,
- const char *value, size_t size, gfp_t flags);
+
+static inline const char *xattr_prefix(const struct xattr_handler *handler)
+{
+ return handler->prefix ?: handler->name;
+}
struct simple_xattrs {
struct list_head head;
@@ -95,8 +102,7 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
void *buffer, size_t size);
int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
const void *value, size_t size, int flags);
-int simple_xattr_remove(struct simple_xattrs *xattrs, const char *name);
-ssize_t simple_xattr_list(struct simple_xattrs *xattrs, char *buffer,
+ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer,
size_t size);
void simple_xattr_list_add(struct simple_xattrs *xattrs,
struct simple_xattr *new_xattr);
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index b36d837c701e..2a91a0561a47 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -62,6 +62,7 @@ struct unix_sock {
#define UNIX_GC_CANDIDATE 0
#define UNIX_GC_MAYBE_CYCLE 1
struct socket_wq peer_wq;
+ wait_queue_t peer_wake;
};
static inline struct unix_sock *unix_sk(const struct sock *sk)
diff --git a/include/net/dst.h b/include/net/dst.h
index 1279f9b09791..c7329dcd90cc 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -322,6 +322,39 @@ static inline void skb_dst_force(struct sk_buff *skb)
}
}
+/**
+ * dst_hold_safe - Take a reference on a dst if possible
+ * @dst: pointer to dst entry
+ *
+ * This helper returns false if it could not safely
+ * take a reference on a dst.
+ */
+static inline bool dst_hold_safe(struct dst_entry *dst)
+{
+ if (dst->flags & DST_NOCACHE)
+ return atomic_inc_not_zero(&dst->__refcnt);
+ dst_hold(dst);
+ return true;
+}
+
+/**
+ * skb_dst_force_safe - makes sure skb dst is refcounted
+ * @skb: buffer
+ *
+ * If dst is not yet refcounted and not destroyed, grab a ref on it.
+ */
+static inline void skb_dst_force_safe(struct sk_buff *skb)
+{
+ if (skb_dst_is_noref(skb)) {
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (!dst_hold_safe(dst))
+ dst = NULL;
+
+ skb->_skb_refdst = (unsigned long)dst;
+ }
+}
+
/**
* __skb_tunnel_rx - prepare skb for rx reinsert
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 2134e6d815bc..625bdf95d673 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -210,18 +210,37 @@ struct inet_sock {
#define IP_CMSG_ORIGDSTADDR BIT(6)
#define IP_CMSG_CHECKSUM BIT(7)
-/* SYNACK messages might be attached to request sockets.
+/**
+ * sk_to_full_sk - Access to a full socket
+ * @sk: pointer to a socket
+ *
+ * SYNACK messages might be attached to request sockets.
* Some places want to reach the listener in this case.
*/
-static inline struct sock *skb_to_full_sk(const struct sk_buff *skb)
+static inline struct sock *sk_to_full_sk(struct sock *sk)
{
- struct sock *sk = skb->sk;
-
+#ifdef CONFIG_INET
if (sk && sk->sk_state == TCP_NEW_SYN_RECV)
sk = inet_reqsk(sk)->rsk_listener;
+#endif
+ return sk;
+}
+
+/* sk_to_full_sk() variant with a const argument */
+static inline const struct sock *sk_const_to_full_sk(const struct sock *sk)
+{
+#ifdef CONFIG_INET
+ if (sk && sk->sk_state == TCP_NEW_SYN_RECV)
+ sk = ((const struct request_sock *)sk)->rsk_listener;
+#endif
return sk;
}
+static inline struct sock *skb_to_full_sk(const struct sk_buff *skb)
+{
+ return sk_to_full_sk(skb->sk);
+}
+
static inline struct inet_sock *inet_sk(const struct sock *sk)
{
return (struct inet_sock *)sk;
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 4a6009d4486b..235c7811a86a 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -78,6 +78,7 @@ void inet_initpeers(void) __init;
static inline void inetpeer_set_addr_v4(struct inetpeer_addr *iaddr, __be32 ip)
{
iaddr->a4.addr = ip;
+ iaddr->a4.vif = 0;
iaddr->family = AF_INET;
}
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index aaf9700fc9e5..fb961a576abe 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -167,7 +167,8 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
static inline u32 rt6_get_cookie(const struct rt6_info *rt)
{
- if (rt->rt6i_flags & RTF_PCPU || unlikely(rt->dst.flags & DST_NOCACHE))
+ if (rt->rt6i_flags & RTF_PCPU ||
+ (unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from))
rt = (struct rt6_info *)(rt->dst.from);
return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 2bfb2ad2fab1..877f682989b8 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -133,27 +133,18 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
/*
* Store a destination cache entry in a socket
*/
-static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr)
+static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
struct ipv6_pinfo *np = inet6_sk(sk);
- struct rt6_info *rt = (struct rt6_info *) dst;
+ np->dst_cookie = rt6_get_cookie((struct rt6_info *)dst);
sk_setup_caps(sk, dst);
np->daddr_cache = daddr;
#ifdef CONFIG_IPV6_SUBTREES
np->saddr_cache = saddr;
#endif
- np->dst_cookie = rt6_get_cookie(rt);
-}
-
-static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
- struct in6_addr *daddr, struct in6_addr *saddr)
-{
- spin_lock(&sk->sk_dst_lock);
- __ip6_dst_store(sk, dst, daddr, saddr);
- spin_unlock(&sk->sk_dst_lock);
}
static inline bool ipv6_unicast_destination(const struct sk_buff *skb)
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index aaee6fa02cf1..ff788b665277 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -90,11 +90,12 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
if (net_xmit_eval(err) == 0) {
- struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+ struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats);
u64_stats_update_begin(&tstats->syncp);
tstats->tx_bytes += pkt_len;
tstats->tx_packets++;
u64_stats_update_end(&tstats->syncp);
+ put_cpu_ptr(tstats);
} else {
stats->tx_errors++;
stats->tx_aborted_errors++;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index f6dafec9102c..62a750a6a8f8 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -287,12 +287,13 @@ static inline void iptunnel_xmit_stats(int err,
struct pcpu_sw_netstats __percpu *stats)
{
if (err > 0) {
- struct pcpu_sw_netstats *tstats = this_cpu_ptr(stats);
+ struct pcpu_sw_netstats *tstats = get_cpu_ptr(stats);
u64_stats_update_begin(&tstats->syncp);
tstats->tx_bytes += err;
tstats->tx_packets++;
u64_stats_update_end(&tstats->syncp);
+ put_cpu_ptr(tstats);
} else if (err < 0) {
err_stats->tx_errors++;
err_stats->tx_aborted_errors++;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e1a10b0ac0b0..9a5c9f013784 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -205,6 +205,7 @@ extern rwlock_t ip6_ra_lock;
*/
struct ipv6_txoptions {
+ atomic_t refcnt;
/* Length of this structure */
int tot_len;
@@ -217,7 +218,7 @@ struct ipv6_txoptions {
struct ipv6_opt_hdr *dst0opt;
struct ipv6_rt_hdr *srcrt; /* Routing Header */
struct ipv6_opt_hdr *dst1opt;
-
+ struct rcu_head rcu;
/* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */
};
@@ -252,6 +253,24 @@ struct ipv6_fl_socklist {
struct rcu_head rcu;
};
+static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
+{
+ struct ipv6_txoptions *opt;
+
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt && !atomic_inc_not_zero(&opt->refcnt))
+ opt = NULL;
+ rcu_read_unlock();
+ return opt;
+}
+
+static inline void txopt_put(struct ipv6_txoptions *opt)
+{
+ if (opt && atomic_dec_and_test(&opt->refcnt))
+ kfree_rcu(opt, rcu);
+}
+
struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label);
struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
struct ip6_flowlabel *fl,
@@ -490,6 +509,7 @@ struct ip6_create_arg {
u32 user;
const struct in6_addr *src;
const struct in6_addr *dst;
+ int iif;
u8 ecn;
};
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 774d85b2d5d9..5689a0c749f7 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -29,7 +29,7 @@ struct l3mdev_ops {
/* IPv4 ops */
struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev,
const struct flowi4 *fl4);
- void (*l3mdev_get_saddr)(struct net_device *dev,
+ int (*l3mdev_get_saddr)(struct net_device *dev,
struct flowi4 *fl4);
/* IPv6 ops */
@@ -112,10 +112,11 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
return rc;
}
-static inline void l3mdev_get_saddr(struct net *net, int ifindex,
- struct flowi4 *fl4)
+static inline int l3mdev_get_saddr(struct net *net, int ifindex,
+ struct flowi4 *fl4)
{
struct net_device *dev;
+ int rc = 0;
if (ifindex) {
@@ -124,11 +125,13 @@ static inline void l3mdev_get_saddr(struct net *net, int ifindex,
dev = dev_get_by_index_rcu(net, ifindex);
if (dev && netif_is_l3_master(dev) &&
dev->l3mdev_ops->l3mdev_get_saddr) {
- dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4);
+ rc = dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4);
}
rcu_read_unlock();
}
+
+ return rc;
}
static inline struct dst_entry *l3mdev_get_rt6_dst(const struct net_device *dev,
@@ -200,9 +203,10 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
return false;
}
-static inline void l3mdev_get_saddr(struct net *net, int ifindex,
- struct flowi4 *fl4)
+static inline int l3mdev_get_saddr(struct net *net, int ifindex,
+ struct flowi4 *fl4)
{
+ return 0;
}
static inline
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 82045fca388b..760bc4d5a2cf 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2003,8 +2003,10 @@ enum ieee80211_hw_flags {
* it shouldn't be set.
*
* @max_tx_aggregation_subframes: maximum number of subframes in an
- * aggregate an HT driver will transmit, used by the peer as a
- * hint to size its reorder buffer.
+ * aggregate an HT driver will transmit. Though ADDBA will advertise
+ * a constant value of 64 as some older APs can crash if the window
+ * size is smaller (an example is LinkSys WRT120N with FW v1.0.07
+ * build 002 Jun 18 2012).
*
* @offchannel_tx_hw_queue: HW queue ID to use for offchannel TX
* (if %IEEE80211_HW_QUEUE_CONTROL is set)
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index bf3937431030..2d8edaad29cb 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -181,8 +181,7 @@ void ndisc_cleanup(void);
int ndisc_rcv(struct sk_buff *skb);
void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
- const struct in6_addr *daddr, const struct in6_addr *saddr,
- struct sk_buff *oskb);
+ const struct in6_addr *daddr, const struct in6_addr *saddr);
void ndisc_send_rs(struct net_device *dev,
const struct in6_addr *saddr, const struct in6_addr *daddr);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index c9149cc0a02d..4bd7508bedc9 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -618,6 +618,8 @@ struct nft_expr_ops {
void (*eval)(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt);
+ int (*clone)(struct nft_expr *dst,
+ const struct nft_expr *src);
unsigned int size;
int (*init)(const struct nft_ctx *ctx,
@@ -660,10 +662,20 @@ void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr);
int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
const struct nft_expr *expr);
-static inline void nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
+static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
{
+ int err;
+
__module_get(src->ops->type->owner);
- memcpy(dst, src, src->ops->size);
+ if (src->ops->clone) {
+ dst->ops = src->ops;
+ err = src->ops->clone(dst, src);
+ if (err < 0)
+ return err;
+ } else {
+ memcpy(dst, src, src->ops->size);
+ }
+ return 0;
}
/**
diff --git a/include/net/route.h b/include/net/route.h
index ee81307863d5..a3b9ef74a389 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -283,7 +283,12 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
sport, dport, sk);
if (!src && oif) {
- l3mdev_get_saddr(net, oif, fl4);
+ int rc;
+
+ rc = l3mdev_get_saddr(net, oif, fl4);
+ if (rc < 0)
+ return ERR_PTR(rc);
+
src = fl4->saddr;
}
if (!dst || !src) {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 4c79ce8c1f92..b2a8e6338576 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -61,6 +61,9 @@ struct Qdisc {
*/
#define TCQ_F_WARN_NONWC (1 << 16)
#define TCQ_F_CPUSTATS 0x20 /* run using percpu statistics */
+#define TCQ_F_NOPARENT 0x40 /* root of its hierarchy :
+ * qdisc_tree_decrease_qlen() should stop.
+ */
u32 limit;
const struct Qdisc_ops *ops;
struct qdisc_size_table __rcu *stab;
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 495c87e367b3..eea9bdeecba2 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -775,10 +775,10 @@ struct sctp_transport {
hb_sent:1,
/* Is the Path MTU update pending on this tranport */
- pmtu_pending:1;
+ pmtu_pending:1,
- /* Has this transport moved the ctsn since we last sacked */
- __u32 sack_generation;
+ /* Has this transport moved the ctsn since we last sacked */
+ sack_generation:1;
u32 dst_cookie;
struct flowi fl;
@@ -1482,19 +1482,20 @@ struct sctp_association {
prsctp_capable:1, /* Can peer do PR-SCTP? */
auth_capable:1; /* Is peer doing SCTP-AUTH? */
- /* Ack State : This flag indicates if the next received
+ /* sack_needed : This flag indicates if the next received
* : packet is to be responded to with a
- * : SACK. This is initializedto 0. When a packet
- * : is received it is incremented. If this value
+ * : SACK. This is initialized to 0. When a packet
+ * : is received sack_cnt is incremented. If this value
* : reaches 2 or more, a SACK is sent and the
* : value is reset to 0. Note: This is used only
* : when no DATA chunks are received out of
* : order. When DATA chunks are out of order,
* : SACK's are not delayed (see Section 6).
*/
- __u8 sack_needed; /* Do we need to sack the peer? */
+ __u8 sack_needed:1, /* Do we need to sack the peer? */
+ sack_generation:1,
+ zero_window_announced:1;
__u32 sack_cnt;
- __u32 sack_generation;
__u32 adaptation_ind; /* Adaptation Code point. */
diff --git a/include/net/sock.h b/include/net/sock.h
index bbf7c2cf15b4..14d3c0734007 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -254,7 +254,6 @@ struct cg_proto;
* @sk_wq: sock wait queue and async head
* @sk_rx_dst: receive input route used by early demux
* @sk_dst_cache: destination cache
- * @sk_dst_lock: destination cache lock
* @sk_policy: flow policy
* @sk_receive_queue: incoming packets
* @sk_wmem_alloc: transmit queue bytes committed
@@ -384,14 +383,16 @@ struct sock {
int sk_rcvbuf;
struct sk_filter __rcu *sk_filter;
- struct socket_wq __rcu *sk_wq;
-
+ union {
+ struct socket_wq __rcu *sk_wq;
+ struct socket_wq *sk_wq_raw;
+ };
#ifdef CONFIG_XFRM
- struct xfrm_policy *sk_policy[2];
+ struct xfrm_policy __rcu *sk_policy[2];
#endif
struct dst_entry *sk_rx_dst;
struct dst_entry __rcu *sk_dst_cache;
- spinlock_t sk_dst_lock;
+ /* Note: 32bit hole on 64bit arches */
atomic_t sk_wmem_alloc;
atomic_t sk_omem_alloc;
int sk_sndbuf;
@@ -403,6 +404,7 @@ struct sock {
sk_userlocks : 4,
sk_protocol : 8,
sk_type : 16;
+#define SK_PROTOCOL_MAX U8_MAX
kmemcheck_bitfield_end(flags);
int sk_wmem_queued;
gfp_t sk_allocation;
@@ -739,6 +741,8 @@ enum sock_flags {
SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
};
+#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
+
static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
{
nsk->sk_flags = osk->sk_flags;
@@ -813,7 +817,7 @@ void sk_stream_write_space(struct sock *sk);
static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
{
/* dont let skb dst not refcounted, we are going to leave rcu lock */
- skb_dst_force(skb);
+ skb_dst_force_safe(skb);
if (!sk->sk_backlog.tail)
sk->sk_backlog.head = skb;
@@ -2005,10 +2009,27 @@ static inline unsigned long sock_wspace(struct sock *sk)
return amt;
}
-static inline void sk_wake_async(struct sock *sk, int how, int band)
+/* Note:
+ * We use sk->sk_wq_raw, from contexts knowing this
+ * pointer is not NULL and cannot disappear/change.
+ */
+static inline void sk_set_bit(int nr, struct sock *sk)
+{
+ set_bit(nr, &sk->sk_wq_raw->flags);
+}
+
+static inline void sk_clear_bit(int nr, struct sock *sk)
+{
+ clear_bit(nr, &sk->sk_wq_raw->flags);
+}
+
+static inline void sk_wake_async(const struct sock *sk, int how, int band)
{
- if (sock_flag(sk, SOCK_FASYNC))
- sock_wake_async(sk->sk_socket, how, band);
+ if (sock_flag(sk, SOCK_FASYNC)) {
+ rcu_read_lock();
+ sock_wake_async(rcu_dereference(sk->sk_wq), how, band);
+ rcu_read_unlock();
+ }
}
/* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might
@@ -2226,6 +2247,31 @@ static inline bool sk_listener(const struct sock *sk)
return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
}
+/**
+ * sk_state_load - read sk->sk_state for lockless contexts
+ * @sk: socket pointer
+ *
+ * Paired with sk_state_store(). Used in places we do not hold socket lock :
+ * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ...
+ */
+static inline int sk_state_load(const struct sock *sk)
+{
+ return smp_load_acquire(&sk->sk_state);
+}
+
+/**
+ * sk_state_store - update sk->sk_state
+ * @sk: socket pointer
+ * @newstate: new state
+ *
+ * Paired with sk_state_load(). Should be used in contexts where
+ * state change might impact lockless readers.
+ */
+static inline void sk_state_store(struct sock *sk, int newstate)
+{
+ smp_store_release(&sk->sk_state, newstate);
+}
+
void sock_enable_timestamp(struct sock *sk, int flag);
int sock_get_timestamp(struct sock *, struct timeval __user *);
int sock_get_timestampns(struct sock *, struct timespec __user *);
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index bc865e244efe..1d22ce9f352e 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -323,7 +323,7 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb,
struct net_device *filter_dev,
int idx)
{
- return -EOPNOTSUPP;
+ return idx;
}
static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index c1c899c3a51b..e289ada6adf6 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -79,7 +79,7 @@ struct vxlanhdr {
};
/* VXLAN header flags. */
-#define VXLAN_HF_RCO BIT(24)
+#define VXLAN_HF_RCO BIT(21)
#define VXLAN_HF_VNI BIT(27)
#define VXLAN_HF_GBP BIT(31)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 4a9c21f9b4ea..d6f6e5006ee9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -548,6 +548,7 @@ struct xfrm_policy {
u16 family;
struct xfrm_sec_ctx *security;
struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH];
+ struct rcu_head rcu;
};
static inline struct net *xp_net(const struct xfrm_policy *xp)
@@ -1141,12 +1142,14 @@ static inline int xfrm6_route_forward(struct sk_buff *skb)
return xfrm_route_forward(skb, AF_INET6);
}
-int __xfrm_sk_clone_policy(struct sock *sk);
+int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk);
-static inline int xfrm_sk_clone_policy(struct sock *sk)
+static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
{
- if (unlikely(sk->sk_policy[0] || sk->sk_policy[1]))
- return __xfrm_sk_clone_policy(sk);
+ sk->sk_policy[0] = NULL;
+ sk->sk_policy[1] = NULL;
+ if (unlikely(osk->sk_policy[0] || osk->sk_policy[1]))
+ return __xfrm_sk_clone_policy(sk, osk);
return 0;
}
@@ -1154,12 +1157,16 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir);
static inline void xfrm_sk_free_policy(struct sock *sk)
{
- if (unlikely(sk->sk_policy[0] != NULL)) {
- xfrm_policy_delete(sk->sk_policy[0], XFRM_POLICY_MAX);
+ struct xfrm_policy *pol;
+
+ pol = rcu_dereference_protected(sk->sk_policy[0], 1);
+ if (unlikely(pol != NULL)) {
+ xfrm_policy_delete(pol, XFRM_POLICY_MAX);
sk->sk_policy[0] = NULL;
}
- if (unlikely(sk->sk_policy[1] != NULL)) {
- xfrm_policy_delete(sk->sk_policy[1], XFRM_POLICY_MAX+1);
+ pol = rcu_dereference_protected(sk->sk_policy[1], 1);
+ if (unlikely(pol != NULL)) {
+ xfrm_policy_delete(pol, XFRM_POLICY_MAX+1);
sk->sk_policy[1] = NULL;
}
}
@@ -1169,7 +1176,7 @@ void xfrm_garbage_collect(struct net *net);
#else
static inline void xfrm_sk_free_policy(struct sock *sk) {}
-static inline int xfrm_sk_clone_policy(struct sock *sk) { return 0; }
+static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; }
static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 188df91d5851..ec9b44dd3d80 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -237,6 +237,8 @@ struct ib_vendor_mad {
u8 data[IB_MGMT_VENDOR_DATA];
};
+#define IB_MGMT_CLASSPORTINFO_ATTR_ID cpu_to_be16(0x0001)
+
struct ib_class_port_info {
u8 base_version;
u8 class_version;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 9a68a19532ba..120da1d7f57e 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1271,6 +1271,7 @@ struct ib_uobject {
int id; /* index into kernel idr */
struct kref ref;
struct rw_semaphore mutex; /* protects .live */
+ struct rcu_head rcu; /* kfree_rcu() overhead */
int live;
};
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index ed527121031d..fcfa3d7f5e7e 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -668,6 +668,9 @@ struct Scsi_Host {
unsigned use_blk_mq:1;
unsigned use_cmd_list:1;
+ /* Host responded with short (<36 bytes) INQUIRY result */
+ unsigned short_inquiry:1;
+
/*
* Optional work queue to be utilized by the transport
*/
diff --git a/include/sound/hda_register.h b/include/sound/hda_register.h
index 2ae8812d7b1a..94dc6a9772e0 100644
--- a/include/sound/hda_register.h
+++ b/include/sound/hda_register.h
@@ -93,6 +93,9 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 };
#define AZX_REG_HSW_EM4 0x100c
#define AZX_REG_HSW_EM5 0x1010
+/* Skylake/Broxton display HD-A controller Extended Mode registers */
+#define AZX_REG_SKL_EM4L 0x1040
+
/* PCI space */
#define AZX_PCIREG_TCSEL 0x44
diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index 7855cfe46b69..95a937eafb79 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -398,6 +398,7 @@ int snd_soc_dapm_del_routes(struct snd_soc_dapm_context *dapm,
int snd_soc_dapm_weak_routes(struct snd_soc_dapm_context *dapm,
const struct snd_soc_dapm_route *route, int num);
void snd_soc_dapm_free_widget(struct snd_soc_dapm_widget *w);
+void snd_soc_dapm_reset_cache(struct snd_soc_dapm_context *dapm);
/* dapm events */
void snd_soc_dapm_stream_event(struct snd_soc_pcm_runtime *rtd, int stream,
diff --git a/include/sound/soc.h b/include/sound/soc.h
index a8b4b9c8b1d2..fb955e69a78e 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1655,7 +1655,7 @@ extern const struct dev_pm_ops snd_soc_pm_ops;
/* Helper functions */
static inline void snd_soc_dapm_mutex_lock(struct snd_soc_dapm_context *dapm)
{
- mutex_lock(&dapm->card->dapm_mutex);
+ mutex_lock_nested(&dapm->card->dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME);
}
static inline void snd_soc_dapm_mutex_unlock(struct snd_soc_dapm_context *dapm)
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 0a2c74008e53..aabf0aca0171 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -474,7 +474,7 @@ struct se_cmd {
struct completion cmd_wait_comp;
const struct target_core_fabric_ops *se_tfo;
sense_reason_t (*execute_cmd)(struct se_cmd *);
- sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool);
+ sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool, int *);
void *protocol_data;
unsigned char *t_task_cdb;
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 628e6e64c2fb..c2e5d6cb34e3 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -186,6 +186,7 @@ header-y += if_tunnel.h
header-y += if_vlan.h
header-y += if_x25.h
header-y += igmp.h
+header-y += ila.h
header-y += in6.h
header-y += inet_diag.h
header-y += in.h
diff --git a/include/uapi/linux/nfs.h b/include/uapi/linux/nfs.h
index 654bae3f1a38..5e6296160361 100644
--- a/include/uapi/linux/nfs.h
+++ b/include/uapi/linux/nfs.h
@@ -33,17 +33,6 @@
#define NFS_PIPE_DIRNAME "nfs"
-/* NFS ioctls */
-/* Let's follow btrfs lead on CLONE to avoid messing userspace */
-#define NFS_IOC_CLONE _IOW(0x94, 9, int)
-#define NFS_IOC_CLONE_RANGE _IOW(0x94, 13, int)
-
-struct nfs_ioctl_clone_range_args {
- __s64 src_fd;
- __u64 src_off, count;
- __u64 dst_off;
-};
-
/*
* NFS stats. The good thing with these values is that NFSv3 errors are
* a superset of NFSv2 errors (with the exception of NFSERR_WFLUSH which
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 28ccedd000f5..a27222d5b413 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -628,7 +628,7 @@ struct ovs_action_hash {
* @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the
* mask, the corresponding bit in the value is copied to the connection
* tracking mark field in the connection.
- * @OVS_CT_ATTR_LABEL: %OVS_CT_LABELS_LEN value followed by %OVS_CT_LABELS_LEN
+ * @OVS_CT_ATTR_LABELS: %OVS_CT_LABELS_LEN value followed by %OVS_CT_LABELS_LEN
* mask. For each bit set in the mask, the corresponding bit in the value is
* copied to the connection tracking label field in the connection.
* @OVS_CT_ATTR_HELPER: variable length string defining conntrack ALG.
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index d801bb0d9f6d..1afe9623c1a7 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -171,6 +171,9 @@ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */
PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */
+ PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */
+ PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */
+
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
};
@@ -192,6 +195,9 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
+
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 751b69f858c8..9fd7b5d8df2f 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -39,13 +39,6 @@
#define VFIO_SPAPR_TCE_v2_IOMMU 7
/*
- * The No-IOMMU IOMMU offers no translation or isolation for devices and
- * supports no ioctls outside of VFIO_CHECK_EXTENSION. Use of VFIO's No-IOMMU
- * code will taint the host kernel and should be used with extreme caution.
- */
-#define VFIO_NOIOMMU_IOMMU 8
-
-/*
* The IOCTL interface is designed for extensibility by embedding the
* structure length (argsz) and flags into structures passed between
* kernel and userspace. We therefore use the _IO() macro for these
diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index 85dedca3dcfb..eeba75395f7d 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -343,7 +343,6 @@ struct ipu_client_platformdata {
int di;
int dc;
int dp;
- int dmfc;
int dma[2];
};
diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
index 7d28aff605c7..7dc685b4057d 100644
--- a/include/xen/interface/io/ring.h
+++ b/include/xen/interface/io/ring.h
@@ -181,6 +181,20 @@ struct __name##_back_ring { \
#define RING_GET_REQUEST(_r, _idx) \
(&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
+/*
+ * Get a local copy of a request.
+ *
+ * Use this in preference to RING_GET_REQUEST() so all processing is
+ * done on a local copy that cannot be modified by the other end.
+ *
+ * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this
+ * to be ineffective where _req is a struct which consists of only bitfields.
+ */
+#define RING_COPY_REQUEST(_r, _idx, _req) do { \
+ /* Use volatile to force the copy into _req. */ \
+ *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); \
+} while (0)
+
#define RING_GET_RESPONSE(_r, _idx) \
(&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
diff --git a/init/Kconfig b/init/Kconfig
index c24b6f767bf0..235c7a2c0d20 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -2030,13 +2030,6 @@ config INIT_ALL_POSSIBLE
it was better to provide this option than to break all the archs
and have several arch maintainers pursuing me down dark alleys.
-config STOP_MACHINE
- bool
- default y
- depends on (SMP && MODULE_UNLOAD) || HOTPLUG_CPU
- help
- Need stop_machine() primitive.
-
source "block/Kconfig"
config PREEMPT_NOTIFIERS
diff --git a/init/main.c b/init/main.c
index 9e64d7097f1a..c6ebefafa496 100644
--- a/init/main.c
+++ b/init/main.c
@@ -943,6 +943,8 @@ static int __ref kernel_init(void *unused)
flush_delayed_fput();
+ rcu_end_inkernel_boot();
+
if (ramdisk_execute_command) {
ret = run_init_process(ramdisk_execute_command);
if (!ret)
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 3f4c99e06c6b..b0799bced518 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -28,11 +28,17 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
attr->value_size == 0)
return ERR_PTR(-EINVAL);
+ if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1))
+ /* if value_size is bigger, the user space won't be able to
+ * access the elements.
+ */
+ return ERR_PTR(-E2BIG);
+
elem_size = round_up(attr->value_size, 8);
/* check round_up into zero and u32 overflow */
if (elem_size == 0 ||
- attr->max_entries > (U32_MAX - sizeof(*array)) / elem_size)
+ attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size)
return ERR_PTR(-ENOMEM);
array_size = sizeof(*array) + attr->max_entries * elem_size;
@@ -105,7 +111,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
/* all elements already exist */
return -EEXIST;
- memcpy(array->value + array->elem_size * index, value, array->elem_size);
+ memcpy(array->value + array->elem_size * index, value, map->value_size);
return 0;
}
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 19909b22b4f8..34777b3746fa 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -64,12 +64,35 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
*/
goto free_htab;
- err = -ENOMEM;
+ if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) -
+ MAX_BPF_STACK - sizeof(struct htab_elem))
+ /* if value_size is bigger, the user space won't be able to
+ * access the elements via bpf syscall. This check also makes
+ * sure that the elem_size doesn't overflow and it's
+ * kmalloc-able later in htab_map_update_elem()
+ */
+ goto free_htab;
+
+ htab->elem_size = sizeof(struct htab_elem) +
+ round_up(htab->map.key_size, 8) +
+ htab->map.value_size;
+
/* prevent zero size kmalloc and check for u32 overflow */
if (htab->n_buckets == 0 ||
htab->n_buckets > U32_MAX / sizeof(struct hlist_head))
goto free_htab;
+ if ((u64) htab->n_buckets * sizeof(struct hlist_head) +
+ (u64) htab->elem_size * htab->map.max_entries >=
+ U32_MAX - PAGE_SIZE)
+ /* make sure page count doesn't overflow */
+ goto free_htab;
+
+ htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
+ htab->elem_size * htab->map.max_entries,
+ PAGE_SIZE) >> PAGE_SHIFT;
+
+ err = -ENOMEM;
htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head),
GFP_USER | __GFP_NOWARN);
@@ -85,13 +108,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
raw_spin_lock_init(&htab->lock);
htab->count = 0;
- htab->elem_size = sizeof(struct htab_elem) +
- round_up(htab->map.key_size, 8) +
- htab->map.value_size;
-
- htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
- htab->elem_size * htab->map.max_entries,
- PAGE_SIZE) >> PAGE_SHIFT;
return &htab->map;
free_htab:
@@ -222,7 +238,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
WARN_ON_ONCE(!rcu_read_lock_held());
/* allocate new element outside of lock */
- l_new = kmalloc(htab->elem_size, GFP_ATOMIC);
+ l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
if (!l_new)
return -ENOMEM;
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index be6d726e31c9..5a8a797d50b7 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -34,7 +34,7 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt);
break;
case BPF_TYPE_MAP:
- atomic_inc(&((struct bpf_map *)raw)->refcnt);
+ bpf_map_inc(raw, true);
break;
default:
WARN_ON_ONCE(1);
@@ -51,7 +51,7 @@ static void bpf_any_put(void *raw, enum bpf_type type)
bpf_prog_put(raw);
break;
case BPF_TYPE_MAP:
- bpf_map_put(raw);
+ bpf_map_put_with_uref(raw);
break;
default:
WARN_ON_ONCE(1);
@@ -64,7 +64,7 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
void *raw;
*type = BPF_TYPE_MAP;
- raw = bpf_map_get(ufd);
+ raw = bpf_map_get_with_uref(ufd);
if (IS_ERR(raw)) {
*type = BPF_TYPE_PROG;
raw = bpf_prog_get(ufd);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0d3313d02a7e..3b39550d8485 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -82,6 +82,14 @@ static void bpf_map_free_deferred(struct work_struct *work)
map->ops->map_free(map);
}
+static void bpf_map_put_uref(struct bpf_map *map)
+{
+ if (atomic_dec_and_test(&map->usercnt)) {
+ if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
+ bpf_fd_array_map_clear(map);
+ }
+}
+
/* decrement map refcnt and schedule it for freeing via workqueue
* (unrelying map implementation ops->map_free() might sleep)
*/
@@ -93,17 +101,15 @@ void bpf_map_put(struct bpf_map *map)
}
}
-static int bpf_map_release(struct inode *inode, struct file *filp)
+void bpf_map_put_with_uref(struct bpf_map *map)
{
- struct bpf_map *map = filp->private_data;
-
- if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
- /* prog_array stores refcnt-ed bpf_prog pointers
- * release them all when user space closes prog_array_fd
- */
- bpf_fd_array_map_clear(map);
-
+ bpf_map_put_uref(map);
bpf_map_put(map);
+}
+
+static int bpf_map_release(struct inode *inode, struct file *filp)
+{
+ bpf_map_put_with_uref(filp->private_data);
return 0;
}
@@ -142,6 +148,7 @@ static int map_create(union bpf_attr *attr)
return PTR_ERR(map);
atomic_set(&map->refcnt, 1);
+ atomic_set(&map->usercnt, 1);
err = bpf_map_charge_memlock(map);
if (err)
@@ -174,7 +181,14 @@ struct bpf_map *__bpf_map_get(struct fd f)
return f.file->private_data;
}
-struct bpf_map *bpf_map_get(u32 ufd)
+void bpf_map_inc(struct bpf_map *map, bool uref)
+{
+ atomic_inc(&map->refcnt);
+ if (uref)
+ atomic_inc(&map->usercnt);
+}
+
+struct bpf_map *bpf_map_get_with_uref(u32 ufd)
{
struct fd f = fdget(ufd);
struct bpf_map *map;
@@ -183,7 +197,7 @@ struct bpf_map *bpf_map_get(u32 ufd)
if (IS_ERR(map))
return map;
- atomic_inc(&map->refcnt);
+ bpf_map_inc(map, true);
fdput(f);
return map;
@@ -226,7 +240,7 @@ static int map_lookup_elem(union bpf_attr *attr)
goto free_key;
err = -ENOMEM;
- value = kmalloc(map->value_size, GFP_USER);
+ value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
if (!value)
goto free_key;
@@ -285,7 +299,7 @@ static int map_update_elem(union bpf_attr *attr)
goto free_key;
err = -ENOMEM;
- value = kmalloc(map->value_size, GFP_USER);
+ value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
if (!value)
goto free_key;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c6073056badf..a7945d10b378 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2021,8 +2021,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
* will be used by the valid program until it's unloaded
* and all maps are released in free_bpf_prog_info()
*/
- atomic_inc(&map->refcnt);
-
+ bpf_map_inc(map, false);
fdput(f);
next_insn:
insn++;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f1603c153890..470f6536b9e8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -98,6 +98,12 @@ static DEFINE_SPINLOCK(css_set_lock);
static DEFINE_SPINLOCK(cgroup_idr_lock);
/*
+ * Protects cgroup_file->kn for !self csses. It synchronizes notifications
+ * against file removal/re-creation across css hiding.
+ */
+static DEFINE_SPINLOCK(cgroup_file_kn_lock);
+
+/*
* Protects cgroup_subsys->release_agent_path. Modifying it also requires
* cgroup_mutex. Reading requires either cgroup_mutex or this spinlock.
*/
@@ -754,9 +760,11 @@ static void put_css_set_locked(struct css_set *cset)
if (!atomic_dec_and_test(&cset->refcount))
return;
- /* This css_set is dead. unlink it and release cgroup refcounts */
- for_each_subsys(ss, ssid)
+ /* This css_set is dead. unlink it and release cgroup and css refs */
+ for_each_subsys(ss, ssid) {
list_del(&cset->e_cset_node[ssid]);
+ css_put(cset->subsys[ssid]);
+ }
hash_del(&cset->hlist);
css_set_count--;
@@ -1056,9 +1064,13 @@ static struct css_set *find_css_set(struct css_set *old_cset,
key = css_set_hash(cset->subsys);
hash_add(css_set_table, &cset->hlist, key);
- for_each_subsys(ss, ssid)
+ for_each_subsys(ss, ssid) {
+ struct cgroup_subsys_state *css = cset->subsys[ssid];
+
list_add_tail(&cset->e_cset_node[ssid],
- &cset->subsys[ssid]->cgroup->e_csets[ssid]);
+ &css->cgroup->e_csets[ssid]);
+ css_get(css);
+ }
spin_unlock_bh(&css_set_lock);
@@ -1393,6 +1405,16 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
char name[CGROUP_FILE_NAME_MAX];
lockdep_assert_held(&cgroup_mutex);
+
+ if (cft->file_offset) {
+ struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
+ struct cgroup_file *cfile = (void *)css + cft->file_offset;
+
+ spin_lock_irq(&cgroup_file_kn_lock);
+ cfile->kn = NULL;
+ spin_unlock_irq(&cgroup_file_kn_lock);
+ }
+
kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
}
@@ -1856,7 +1878,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
INIT_LIST_HEAD(&cgrp->self.sibling);
INIT_LIST_HEAD(&cgrp->self.children);
- INIT_LIST_HEAD(&cgrp->self.files);
INIT_LIST_HEAD(&cgrp->cset_links);
INIT_LIST_HEAD(&cgrp->pidlists);
mutex_init(&cgrp->pidlist_mutex);
@@ -2216,6 +2237,9 @@ struct cgroup_taskset {
struct list_head src_csets;
struct list_head dst_csets;
+ /* the subsys currently being processed */
+ int ssid;
+
/*
* Fields for cgroup_taskset_*() iteration.
*
@@ -2278,25 +2302,29 @@ static void cgroup_taskset_add(struct task_struct *task,
/**
* cgroup_taskset_first - reset taskset and return the first task
* @tset: taskset of interest
+ * @dst_cssp: output variable for the destination css
*
* @tset iteration is initialized and the first task is returned.
*/
-struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
+struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
+ struct cgroup_subsys_state **dst_cssp)
{
tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
tset->cur_task = NULL;
- return cgroup_taskset_next(tset);
+ return cgroup_taskset_next(tset, dst_cssp);
}
/**
* cgroup_taskset_next - iterate to the next task in taskset
* @tset: taskset of interest
+ * @dst_cssp: output variable for the destination css
*
* Return the next task in @tset. Iteration must have been initialized
* with cgroup_taskset_first().
*/
-struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
+struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
+ struct cgroup_subsys_state **dst_cssp)
{
struct css_set *cset = tset->cur_cset;
struct task_struct *task = tset->cur_task;
@@ -2311,6 +2339,18 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
if (&task->cg_list != &cset->mg_tasks) {
tset->cur_cset = cset;
tset->cur_task = task;
+
+ /*
+ * This function may be called both before and
+ * after cgroup_taskset_migrate(). The two cases
+ * can be distinguished by looking at whether @cset
+ * has its ->mg_dst_cset set.
+ */
+ if (cset->mg_dst_cset)
+ *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
+ else
+ *dst_cssp = cset->subsys[tset->ssid];
+
return task;
}
@@ -2346,7 +2386,8 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
/* check that we can legitimately attach to the cgroup */
for_each_e_css(css, i, dst_cgrp) {
if (css->ss->can_attach) {
- ret = css->ss->can_attach(css, tset);
+ tset->ssid = i;
+ ret = css->ss->can_attach(tset);
if (ret) {
failed_css = css;
goto out_cancel_attach;
@@ -2379,9 +2420,12 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
*/
tset->csets = &tset->dst_csets;
- for_each_e_css(css, i, dst_cgrp)
- if (css->ss->attach)
- css->ss->attach(css, tset);
+ for_each_e_css(css, i, dst_cgrp) {
+ if (css->ss->attach) {
+ tset->ssid = i;
+ css->ss->attach(tset);
+ }
+ }
ret = 0;
goto out_release_tset;
@@ -2390,8 +2434,10 @@ out_cancel_attach:
for_each_e_css(css, i, dst_cgrp) {
if (css == failed_css)
break;
- if (css->ss->cancel_attach)
- css->ss->cancel_attach(css, tset);
+ if (css->ss->cancel_attach) {
+ tset->ssid = i;
+ css->ss->cancel_attach(tset);
+ }
}
out_release_tset:
spin_lock_bh(&css_set_lock);
@@ -3313,9 +3359,9 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
if (cft->file_offset) {
struct cgroup_file *cfile = (void *)css + cft->file_offset;
- kernfs_get(kn);
+ spin_lock_irq(&cgroup_file_kn_lock);
cfile->kn = kn;
- list_add(&cfile->node, &css->files);
+ spin_unlock_irq(&cgroup_file_kn_lock);
}
return 0;
@@ -3553,6 +3599,22 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
}
/**
+ * cgroup_file_notify - generate a file modified event for a cgroup_file
+ * @cfile: target cgroup_file
+ *
+ * @cfile must have been obtained by setting cftype->file_offset.
+ */
+void cgroup_file_notify(struct cgroup_file *cfile)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cgroup_file_kn_lock, flags);
+ if (cfile->kn)
+ kernfs_notify(cfile->kn);
+ spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
+}
+
+/**
* cgroup_task_count - count the number of tasks in a cgroup.
* @cgrp: the cgroup in question
*
@@ -4613,13 +4675,9 @@ static void css_free_work_fn(struct work_struct *work)
container_of(work, struct cgroup_subsys_state, destroy_work);
struct cgroup_subsys *ss = css->ss;
struct cgroup *cgrp = css->cgroup;
- struct cgroup_file *cfile;
percpu_ref_exit(&css->refcnt);
- list_for_each_entry(cfile, &css->files, node)
- kernfs_put(cfile->kn);
-
if (ss) {
/* css free path */
int id = css->id;
@@ -4724,7 +4782,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
css->ss = ss;
INIT_LIST_HEAD(&css->sibling);
INIT_LIST_HEAD(&css->children);
- INIT_LIST_HEAD(&css->files);
css->serial_nr = css_serial_nr_next++;
if (cgroup_parent(cgrp)) {
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index f1b30ad5dc6d..2d3df82c54f2 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -155,12 +155,10 @@ static void freezer_css_free(struct cgroup_subsys_state *css)
* @freezer->lock. freezer_attach() makes the new tasks conform to the
* current state and all following state changes can see the new tasks.
*/
-static void freezer_attach(struct cgroup_subsys_state *new_css,
- struct cgroup_taskset *tset)
+static void freezer_attach(struct cgroup_taskset *tset)
{
- struct freezer *freezer = css_freezer(new_css);
struct task_struct *task;
- bool clear_frozen = false;
+ struct cgroup_subsys_state *new_css;
mutex_lock(&freezer_mutex);
@@ -174,22 +172,21 @@ static void freezer_attach(struct cgroup_subsys_state *new_css,
* current state before executing the following - !frozen tasks may
* be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
*/
- cgroup_taskset_for_each(task, tset) {
+ cgroup_taskset_for_each(task, new_css, tset) {
+ struct freezer *freezer = css_freezer(new_css);
+
if (!(freezer->state & CGROUP_FREEZING)) {
__thaw_task(task);
} else {
freeze_task(task);
- freezer->state &= ~CGROUP_FROZEN;
- clear_frozen = true;
+ /* clear FROZEN and propagate upwards */
+ while (freezer && (freezer->state & CGROUP_FROZEN)) {
+ freezer->state &= ~CGROUP_FROZEN;
+ freezer = parent_freezer(freezer);
+ }
}
}
- /* propagate FROZEN clearing upwards */
- while (clear_frozen && (freezer = parent_freezer(freezer))) {
- freezer->state &= ~CGROUP_FROZEN;
- clear_frozen = freezer->state & CGROUP_FREEZING;
- }
-
mutex_unlock(&freezer_mutex);
}
diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c
index cdd8df4e991c..b50d5a167fda 100644
--- a/kernel/cgroup_pids.c
+++ b/kernel/cgroup_pids.c
@@ -106,7 +106,7 @@ static void pids_uncharge(struct pids_cgroup *pids, int num)
{
struct pids_cgroup *p;
- for (p = pids; p; p = parent_pids(p))
+ for (p = pids; parent_pids(p); p = parent_pids(p))
pids_cancel(p, num);
}
@@ -123,7 +123,7 @@ static void pids_charge(struct pids_cgroup *pids, int num)
{
struct pids_cgroup *p;
- for (p = pids; p; p = parent_pids(p))
+ for (p = pids; parent_pids(p); p = parent_pids(p))
atomic64_add(num, &p->counter);
}
@@ -140,7 +140,7 @@ static int pids_try_charge(struct pids_cgroup *pids, int num)
{
struct pids_cgroup *p, *q;
- for (p = pids; p; p = parent_pids(p)) {
+ for (p = pids; parent_pids(p); p = parent_pids(p)) {
int64_t new = atomic64_add_return(num, &p->counter);
/*
@@ -162,13 +162,13 @@ revert:
return -EAGAIN;
}
-static int pids_can_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static int pids_can_attach(struct cgroup_taskset *tset)
{
- struct pids_cgroup *pids = css_pids(css);
struct task_struct *task;
+ struct cgroup_subsys_state *dst_css;
- cgroup_taskset_for_each(task, tset) {
+ cgroup_taskset_for_each(task, dst_css, tset) {
+ struct pids_cgroup *pids = css_pids(dst_css);
struct cgroup_subsys_state *old_css;
struct pids_cgroup *old_pids;
@@ -187,13 +187,13 @@ static int pids_can_attach(struct cgroup_subsys_state *css,
return 0;
}
-static void pids_cancel_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void pids_cancel_attach(struct cgroup_taskset *tset)
{
- struct pids_cgroup *pids = css_pids(css);
struct task_struct *task;
+ struct cgroup_subsys_state *dst_css;
- cgroup_taskset_for_each(task, tset) {
+ cgroup_taskset_for_each(task, dst_css, tset) {
+ struct pids_cgroup *pids = css_pids(dst_css);
struct cgroup_subsys_state *old_css;
struct pids_cgroup *old_pids;
@@ -205,65 +205,28 @@ static void pids_cancel_attach(struct cgroup_subsys_state *css,
}
}
+/*
+ * task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies
+ * on threadgroup_change_begin() held by the copy_process().
+ */
static int pids_can_fork(struct task_struct *task, void **priv_p)
{
struct cgroup_subsys_state *css;
struct pids_cgroup *pids;
- int err;
- /*
- * Use the "current" task_css for the pids subsystem as the tentative
- * css. It is possible we will charge the wrong hierarchy, in which
- * case we will forcefully revert/reapply the charge on the right
- * hierarchy after it is committed to the task proper.
- */
- css = task_get_css(current, pids_cgrp_id);
+ css = task_css_check(current, pids_cgrp_id, true);
pids = css_pids(css);
-
- err = pids_try_charge(pids, 1);
- if (err)
- goto err_css_put;
-
- *priv_p = css;
- return 0;
-
-err_css_put:
- css_put(css);
- return err;
+ return pids_try_charge(pids, 1);
}
static void pids_cancel_fork(struct task_struct *task, void *priv)
{
- struct cgroup_subsys_state *css = priv;
- struct pids_cgroup *pids = css_pids(css);
-
- pids_uncharge(pids, 1);
- css_put(css);
-}
-
-static void pids_fork(struct task_struct *task, void *priv)
-{
struct cgroup_subsys_state *css;
- struct cgroup_subsys_state *old_css = priv;
struct pids_cgroup *pids;
- struct pids_cgroup *old_pids = css_pids(old_css);
- css = task_get_css(task, pids_cgrp_id);
+ css = task_css_check(current, pids_cgrp_id, true);
pids = css_pids(css);
-
- /*
- * If the association has changed, we have to revert and reapply the
- * charge/uncharge on the wrong hierarchy to the current one. Since
- * the association can only change due to an organisation event, its
- * okay for us to ignore the limit in this case.
- */
- if (pids != old_pids) {
- pids_uncharge(old_pids, 1);
- pids_charge(pids, 1);
- }
-
- css_put(css);
- css_put(old_css);
+ pids_uncharge(pids, 1);
}
static void pids_free(struct task_struct *task)
@@ -335,6 +298,7 @@ static struct cftype pids_files[] = {
{
.name = "current",
.read_s64 = pids_current_read,
+ .flags = CFTYPE_NOT_ON_ROOT,
},
{ } /* terminate */
};
@@ -346,7 +310,6 @@ struct cgroup_subsys pids_cgrp_subsys = {
.cancel_attach = pids_cancel_attach,
.can_fork = pids_can_fork,
.cancel_fork = pids_cancel_fork,
- .fork = pids_fork,
.free = pids_free,
.legacy_cftypes = pids_files,
.dfl_cftypes = pids_files,
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index d8560ee3bab7..9ad37b9e44a7 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -24,7 +24,7 @@
#define CREATE_TRACE_POINTS
#include <trace/events/context_tracking.h>
-struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE;
+DEFINE_STATIC_KEY_FALSE(context_tracking_enabled);
EXPORT_SYMBOL_GPL(context_tracking_enabled);
DEFINE_PER_CPU(struct context_tracking, context_tracking);
@@ -191,7 +191,7 @@ void __init context_tracking_cpu_set(int cpu)
if (!per_cpu(context_tracking.active, cpu)) {
per_cpu(context_tracking.active, cpu) = true;
- static_key_slow_inc(&context_tracking_enabled);
+ static_branch_inc(&context_tracking_enabled);
}
if (initialized)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 10ae73611d80..02a8ea5c9963 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1429,15 +1429,16 @@ static int fmeter_getrate(struct fmeter *fmp)
static struct cpuset *cpuset_attach_old_cs;
/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
-static int cpuset_can_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static int cpuset_can_attach(struct cgroup_taskset *tset)
{
- struct cpuset *cs = css_cs(css);
+ struct cgroup_subsys_state *css;
+ struct cpuset *cs;
struct task_struct *task;
int ret;
/* used later by cpuset_attach() */
- cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset));
+ cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
+ cs = css_cs(css);
mutex_lock(&cpuset_mutex);
@@ -1447,7 +1448,7 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css,
(cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
goto out_unlock;
- cgroup_taskset_for_each(task, tset) {
+ cgroup_taskset_for_each(task, css, tset) {
ret = task_can_attach(task, cs->cpus_allowed);
if (ret)
goto out_unlock;
@@ -1467,9 +1468,14 @@ out_unlock:
return ret;
}
-static void cpuset_cancel_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void cpuset_cancel_attach(struct cgroup_taskset *tset)
{
+ struct cgroup_subsys_state *css;
+ struct cpuset *cs;
+
+ cgroup_taskset_first(tset, &css);
+ cs = css_cs(css);
+
mutex_lock(&cpuset_mutex);
css_cs(css)->attach_in_progress--;
mutex_unlock(&cpuset_mutex);
@@ -1482,16 +1488,19 @@ static void cpuset_cancel_attach(struct cgroup_subsys_state *css,
*/
static cpumask_var_t cpus_attach;
-static void cpuset_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void cpuset_attach(struct cgroup_taskset *tset)
{
/* static buf protected by cpuset_mutex */
static nodemask_t cpuset_attach_nodemask_to;
struct task_struct *task;
struct task_struct *leader;
- struct cpuset *cs = css_cs(css);
+ struct cgroup_subsys_state *css;
+ struct cpuset *cs;
struct cpuset *oldcs = cpuset_attach_old_cs;
+ cgroup_taskset_first(tset, &css);
+ cs = css_cs(css);
+
mutex_lock(&cpuset_mutex);
/* prepare for attach */
@@ -1502,7 +1511,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
- cgroup_taskset_for_each(task, tset) {
+ cgroup_taskset_for_each(task, css, tset) {
/*
* can_attach beforehand should guarantee that this doesn't
* fail. TODO: have a better way to handle failure here
@@ -1518,7 +1527,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
* sleep and should be moved outside migration path proper.
*/
cpuset_attach_nodemask_to = cs->effective_mems;
- cgroup_taskset_for_each_leader(leader, tset) {
+ cgroup_taskset_for_each_leader(leader, css, tset) {
struct mm_struct *mm = get_task_mm(leader);
if (mm) {
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index d659487254d5..9c418002b8c1 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -3,7 +3,7 @@
*
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
- * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
* For licensing details see kernel-base/COPYING
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 36babfd20648..bf8244190d0f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3,7 +3,7 @@
*
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
- * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
* For licensing details see kernel-base/COPYING
@@ -126,6 +126,37 @@ static int cpu_function_call(int cpu, remote_function_f func, void *info)
return data.ret;
}
+static void event_function_call(struct perf_event *event,
+ int (*active)(void *),
+ void (*inactive)(void *),
+ void *data)
+{
+ struct perf_event_context *ctx = event->ctx;
+ struct task_struct *task = ctx->task;
+
+ if (!task) {
+ cpu_function_call(event->cpu, active, data);
+ return;
+ }
+
+again:
+ if (!task_function_call(task, active, data))
+ return;
+
+ raw_spin_lock_irq(&ctx->lock);
+ if (ctx->is_active) {
+ /*
+ * Reload the task pointer, it might have been changed by
+ * a concurrent perf_event_context_sched_out().
+ */
+ task = ctx->task;
+ raw_spin_unlock_irq(&ctx->lock);
+ goto again;
+ }
+ inactive(data);
+ raw_spin_unlock_irq(&ctx->lock);
+}
+
#define EVENT_OWNER_KERNEL ((void *) -1)
static bool is_kernel_event(struct perf_event *event)
@@ -435,7 +466,7 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
if (!is_cgroup_event(event))
return;
- cgrp = perf_cgroup_from_task(current);
+ cgrp = perf_cgroup_from_task(current, event->ctx);
/*
* Do not update time when cgroup is not active
*/
@@ -458,7 +489,7 @@ perf_cgroup_set_timestamp(struct task_struct *task,
if (!task || !ctx->nr_cgroups)
return;
- cgrp = perf_cgroup_from_task(task);
+ cgrp = perf_cgroup_from_task(task, ctx);
info = this_cpu_ptr(cgrp->info);
info->timestamp = ctx->timestamp;
}
@@ -489,7 +520,6 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
* we reschedule only in the presence of cgroup
* constrained events.
*/
- rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
@@ -522,8 +552,10 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
* set cgrp before ctxsw in to allow
* event_filter_match() to not have to pass
* task around
+ * we pass the cpuctx->ctx to perf_cgroup_from_task()
+ * because cgorup events are only per-cpu
*/
- cpuctx->cgrp = perf_cgroup_from_task(task);
+ cpuctx->cgrp = perf_cgroup_from_task(task, &cpuctx->ctx);
cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
}
perf_pmu_enable(cpuctx->ctx.pmu);
@@ -531,8 +563,6 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
}
}
- rcu_read_unlock();
-
local_irq_restore(flags);
}
@@ -542,17 +572,20 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
struct perf_cgroup *cgrp1;
struct perf_cgroup *cgrp2 = NULL;
+ rcu_read_lock();
/*
* we come here when we know perf_cgroup_events > 0
+ * we do not need to pass the ctx here because we know
+ * we are holding the rcu lock
*/
- cgrp1 = perf_cgroup_from_task(task);
+ cgrp1 = perf_cgroup_from_task(task, NULL);
/*
* next is NULL when called from perf_event_enable_on_exec()
* that will systematically cause a cgroup_switch()
*/
if (next)
- cgrp2 = perf_cgroup_from_task(next);
+ cgrp2 = perf_cgroup_from_task(next, NULL);
/*
* only schedule out current cgroup events if we know
@@ -561,6 +594,8 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
*/
if (cgrp1 != cgrp2)
perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
+
+ rcu_read_unlock();
}
static inline void perf_cgroup_sched_in(struct task_struct *prev,
@@ -569,13 +604,16 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
struct perf_cgroup *cgrp1;
struct perf_cgroup *cgrp2 = NULL;
+ rcu_read_lock();
/*
* we come here when we know perf_cgroup_events > 0
+ * we do not need to pass the ctx here because we know
+ * we are holding the rcu lock
*/
- cgrp1 = perf_cgroup_from_task(task);
+ cgrp1 = perf_cgroup_from_task(task, NULL);
/* prev can never be NULL */
- cgrp2 = perf_cgroup_from_task(prev);
+ cgrp2 = perf_cgroup_from_task(prev, NULL);
/*
* only need to schedule in cgroup events if we are changing
@@ -584,6 +622,8 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
*/
if (cgrp1 != cgrp2)
perf_cgroup_switch(task, PERF_CGROUP_SWIN);
+
+ rcu_read_unlock();
}
static inline int perf_cgroup_connect(int fd, struct perf_event *event,
@@ -1620,6 +1660,17 @@ struct remove_event {
bool detach_group;
};
+static void ___perf_remove_from_context(void *info)
+{
+ struct remove_event *re = info;
+ struct perf_event *event = re->event;
+ struct perf_event_context *ctx = event->ctx;
+
+ if (re->detach_group)
+ perf_group_detach(event);
+ list_del_event(event, ctx);
+}
+
/*
* Cross CPU call to remove a performance event
*
@@ -1647,7 +1698,6 @@ static int __perf_remove_from_context(void *info)
return 0;
}
-
/*
* Remove the event from a task's (or a CPU's) list of events.
*
@@ -1664,7 +1714,6 @@ static int __perf_remove_from_context(void *info)
static void perf_remove_from_context(struct perf_event *event, bool detach_group)
{
struct perf_event_context *ctx = event->ctx;
- struct task_struct *task = ctx->task;
struct remove_event re = {
.event = event,
.detach_group = detach_group,
@@ -1672,44 +1721,8 @@ static void perf_remove_from_context(struct perf_event *event, bool detach_group
lockdep_assert_held(&ctx->mutex);
- if (!task) {
- /*
- * Per cpu events are removed via an smp call. The removal can
- * fail if the CPU is currently offline, but in that case we
- * already called __perf_remove_from_context from
- * perf_event_exit_cpu.
- */
- cpu_function_call(event->cpu, __perf_remove_from_context, &re);
- return;
- }
-
-retry:
- if (!task_function_call(task, __perf_remove_from_context, &re))
- return;
-
- raw_spin_lock_irq(&ctx->lock);
- /*
- * If we failed to find a running task, but find the context active now
- * that we've acquired the ctx->lock, retry.
- */
- if (ctx->is_active) {
- raw_spin_unlock_irq(&ctx->lock);
- /*
- * Reload the task pointer, it might have been changed by
- * a concurrent perf_event_context_sched_out().
- */
- task = ctx->task;
- goto retry;
- }
-
- /*
- * Since the task isn't running, its safe to remove the event, us
- * holding the ctx->lock ensures the task won't get scheduled in.
- */
- if (detach_group)
- perf_group_detach(event);
- list_del_event(event, ctx);
- raw_spin_unlock_irq(&ctx->lock);
+ event_function_call(event, __perf_remove_from_context,
+ ___perf_remove_from_context, &re);
}
/*
@@ -1753,6 +1766,20 @@ int __perf_event_disable(void *info)
return 0;
}
+void ___perf_event_disable(void *info)
+{
+ struct perf_event *event = info;
+
+ /*
+ * Since we have the lock this context can't be scheduled
+ * in, so we can change the state safely.
+ */
+ if (event->state == PERF_EVENT_STATE_INACTIVE) {
+ update_group_times(event);
+ event->state = PERF_EVENT_STATE_OFF;
+ }
+}
+
/*
* Disable a event.
*
@@ -1769,43 +1796,16 @@ int __perf_event_disable(void *info)
static void _perf_event_disable(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
- struct task_struct *task = ctx->task;
-
- if (!task) {
- /*
- * Disable the event on the cpu that it's on
- */
- cpu_function_call(event->cpu, __perf_event_disable, event);
- return;
- }
-
-retry:
- if (!task_function_call(task, __perf_event_disable, event))
- return;
raw_spin_lock_irq(&ctx->lock);
- /*
- * If the event is still active, we need to retry the cross-call.
- */
- if (event->state == PERF_EVENT_STATE_ACTIVE) {
+ if (event->state <= PERF_EVENT_STATE_OFF) {
raw_spin_unlock_irq(&ctx->lock);
- /*
- * Reload the task pointer, it might have been changed by
- * a concurrent perf_event_context_sched_out().
- */
- task = ctx->task;
- goto retry;
- }
-
- /*
- * Since we have the lock this context can't be scheduled
- * in, so we can change the state safely.
- */
- if (event->state == PERF_EVENT_STATE_INACTIVE) {
- update_group_times(event);
- event->state = PERF_EVENT_STATE_OFF;
+ return;
}
raw_spin_unlock_irq(&ctx->lock);
+
+ event_function_call(event, __perf_event_disable,
+ ___perf_event_disable, event);
}
/*
@@ -2058,6 +2058,18 @@ static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
}
+static void ___perf_install_in_context(void *info)
+{
+ struct perf_event *event = info;
+ struct perf_event_context *ctx = event->ctx;
+
+ /*
+ * Since the task isn't running, its safe to add the event, us holding
+ * the ctx->lock ensures the task won't get scheduled in.
+ */
+ add_event_to_ctx(event, ctx);
+}
+
/*
* Cross CPU call to install and enable a performance event
*
@@ -2134,48 +2146,14 @@ perf_install_in_context(struct perf_event_context *ctx,
struct perf_event *event,
int cpu)
{
- struct task_struct *task = ctx->task;
-
lockdep_assert_held(&ctx->mutex);
event->ctx = ctx;
if (event->cpu != -1)
event->cpu = cpu;
- if (!task) {
- /*
- * Per cpu events are installed via an smp call and
- * the install is always successful.
- */
- cpu_function_call(cpu, __perf_install_in_context, event);
- return;
- }
-
-retry:
- if (!task_function_call(task, __perf_install_in_context, event))
- return;
-
- raw_spin_lock_irq(&ctx->lock);
- /*
- * If we failed to find a running task, but find the context active now
- * that we've acquired the ctx->lock, retry.
- */
- if (ctx->is_active) {
- raw_spin_unlock_irq(&ctx->lock);
- /*
- * Reload the task pointer, it might have been changed by
- * a concurrent perf_event_context_sched_out().
- */
- task = ctx->task;
- goto retry;
- }
-
- /*
- * Since the task isn't running, its safe to add the event, us holding
- * the ctx->lock ensures the task won't get scheduled in.
- */
- add_event_to_ctx(event, ctx);
- raw_spin_unlock_irq(&ctx->lock);
+ event_function_call(event, __perf_install_in_context,
+ ___perf_install_in_context, event);
}
/*
@@ -2278,6 +2256,11 @@ unlock:
return 0;
}
+void ___perf_event_enable(void *info)
+{
+ __perf_event_mark_enabled((struct perf_event *)info);
+}
+
/*
* Enable a event.
*
@@ -2290,58 +2273,26 @@ unlock:
static void _perf_event_enable(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
- struct task_struct *task = ctx->task;
- if (!task) {
- /*
- * Enable the event on the cpu that it's on
- */
- cpu_function_call(event->cpu, __perf_event_enable, event);
+ raw_spin_lock_irq(&ctx->lock);
+ if (event->state >= PERF_EVENT_STATE_INACTIVE) {
+ raw_spin_unlock_irq(&ctx->lock);
return;
}
- raw_spin_lock_irq(&ctx->lock);
- if (event->state >= PERF_EVENT_STATE_INACTIVE)
- goto out;
-
/*
* If the event is in error state, clear that first.
- * That way, if we see the event in error state below, we
- * know that it has gone back into error state, as distinct
- * from the task having been scheduled away before the
- * cross-call arrived.
+ *
+ * That way, if we see the event in error state below, we know that it
+ * has gone back into error state, as distinct from the task having
+ * been scheduled away before the cross-call arrived.
*/
if (event->state == PERF_EVENT_STATE_ERROR)
event->state = PERF_EVENT_STATE_OFF;
-
-retry:
- if (!ctx->is_active) {
- __perf_event_mark_enabled(event);
- goto out;
- }
-
raw_spin_unlock_irq(&ctx->lock);
- if (!task_function_call(task, __perf_event_enable, event))
- return;
-
- raw_spin_lock_irq(&ctx->lock);
-
- /*
- * If the context is active and the event is still off,
- * we need to retry the cross-call.
- */
- if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) {
- /*
- * task could have been flipped by a concurrent
- * perf_event_context_sched_out()
- */
- task = ctx->task;
- goto retry;
- }
-
-out:
- raw_spin_unlock_irq(&ctx->lock);
+ event_function_call(event, __perf_event_enable,
+ ___perf_event_enable, event);
}
/*
@@ -3145,15 +3096,16 @@ static int event_enable_on_exec(struct perf_event *event,
* Enable all of a task's events that have been marked enable-on-exec.
* This expects task == current.
*/
-static void perf_event_enable_on_exec(struct perf_event_context *ctx)
+static void perf_event_enable_on_exec(int ctxn)
{
- struct perf_event_context *clone_ctx = NULL;
+ struct perf_event_context *ctx, *clone_ctx = NULL;
struct perf_event *event;
unsigned long flags;
int enabled = 0;
int ret;
local_irq_save(flags);
+ ctx = current->perf_event_ctxp[ctxn];
if (!ctx || !ctx->nr_events)
goto out;
@@ -3196,17 +3148,11 @@ out:
void perf_event_exec(void)
{
- struct perf_event_context *ctx;
int ctxn;
rcu_read_lock();
- for_each_task_context_nr(ctxn) {
- ctx = current->perf_event_ctxp[ctxn];
- if (!ctx)
- continue;
-
- perf_event_enable_on_exec(ctx);
- }
+ for_each_task_context_nr(ctxn)
+ perf_event_enable_on_exec(ctxn);
rcu_read_unlock();
}
@@ -4145,6 +4091,22 @@ struct period_event {
u64 value;
};
+static void ___perf_event_period(void *info)
+{
+ struct period_event *pe = info;
+ struct perf_event *event = pe->event;
+ u64 value = pe->value;
+
+ if (event->attr.freq) {
+ event->attr.sample_freq = value;
+ } else {
+ event->attr.sample_period = value;
+ event->hw.sample_period = value;
+ }
+
+ local64_set(&event->hw.period_left, 0);
+}
+
static int __perf_event_period(void *info)
{
struct period_event *pe = info;
@@ -4181,8 +4143,6 @@ static int __perf_event_period(void *info)
static int perf_event_period(struct perf_event *event, u64 __user *arg)
{
struct period_event pe = { .event = event, };
- struct perf_event_context *ctx = event->ctx;
- struct task_struct *task;
u64 value;
if (!is_sampling_event(event))
@@ -4197,27 +4157,10 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
if (event->attr.freq && value > sysctl_perf_event_sample_rate)
return -EINVAL;
- task = ctx->task;
pe.value = value;
- if (!task) {
- cpu_function_call(event->cpu, __perf_event_period, &pe);
- return 0;
- }
-
-retry:
- if (!task_function_call(task, __perf_event_period, &pe))
- return 0;
-
- raw_spin_lock_irq(&ctx->lock);
- if (ctx->is_active) {
- raw_spin_unlock_irq(&ctx->lock);
- task = ctx->task;
- goto retry;
- }
-
- __perf_event_period(&pe);
- raw_spin_unlock_irq(&ctx->lock);
+ event_function_call(event, __perf_event_period,
+ ___perf_event_period, &pe);
return 0;
}
@@ -5667,6 +5610,17 @@ perf_event_aux_ctx(struct perf_event_context *ctx,
}
static void
+perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data,
+ struct perf_event_context *task_ctx)
+{
+ rcu_read_lock();
+ preempt_disable();
+ perf_event_aux_ctx(task_ctx, output, data);
+ preempt_enable();
+ rcu_read_unlock();
+}
+
+static void
perf_event_aux(perf_event_aux_output_cb output, void *data,
struct perf_event_context *task_ctx)
{
@@ -5675,14 +5629,23 @@ perf_event_aux(perf_event_aux_output_cb output, void *data,
struct pmu *pmu;
int ctxn;
+ /*
+ * If we have task_ctx != NULL we only notify
+ * the task context itself. The task_ctx is set
+ * only for EXIT events before releasing task
+ * context.
+ */
+ if (task_ctx) {
+ perf_event_aux_task_ctx(output, data, task_ctx);
+ return;
+ }
+
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
if (cpuctx->unique_pmu != pmu)
goto next;
perf_event_aux_ctx(&cpuctx->ctx, output, data);
- if (task_ctx)
- goto next;
ctxn = pmu->task_ctx_nr;
if (ctxn < 0)
goto next;
@@ -5692,12 +5655,6 @@ perf_event_aux(perf_event_aux_output_cb output, void *data,
next:
put_cpu_ptr(pmu->pmu_cpu_context);
}
-
- if (task_ctx) {
- preempt_disable();
- perf_event_aux_ctx(task_ctx, output, data);
- preempt_enable();
- }
rcu_read_unlock();
}
@@ -6463,9 +6420,6 @@ struct swevent_htable {
/* Recursion avoidance in each contexts */
int recursion[PERF_NR_CONTEXTS];
-
- /* Keeps track of cpu being initialized/exited */
- bool online;
};
static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
@@ -6723,14 +6677,8 @@ static int perf_swevent_add(struct perf_event *event, int flags)
hwc->state = !(flags & PERF_EF_START);
head = find_swevent_head(swhash, event);
- if (!head) {
- /*
- * We can race with cpu hotplug code. Do not
- * WARN if the cpu just got unplugged.
- */
- WARN_ON_ONCE(swhash->online);
+ if (WARN_ON_ONCE(!head))
return -EINVAL;
- }
hlist_add_head_rcu(&event->hlist_entry, head);
perf_event_update_userpage(event);
@@ -6798,7 +6746,6 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
int err = 0;
mutex_lock(&swhash->hlist_mutex);
-
if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
struct swevent_hlist *hlist;
@@ -8787,10 +8734,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
struct perf_event_context *child_ctx, *clone_ctx = NULL;
unsigned long flags;
- if (likely(!child->perf_event_ctxp[ctxn])) {
- perf_event_task(child, NULL, 0);
+ if (likely(!child->perf_event_ctxp[ctxn]))
return;
- }
local_irq_save(flags);
/*
@@ -8874,6 +8819,14 @@ void perf_event_exit_task(struct task_struct *child)
for_each_task_context_nr(ctxn)
perf_event_exit_task_context(child, ctxn);
+
+ /*
+ * The perf_event_exit_task_context calls perf_event_task
+ * with child's task_ctx, which generates EXIT events for
+ * child contexts and sets child->perf_event_ctxp[] to NULL.
+ * At this point we need to send EXIT events to cpu contexts.
+ */
+ perf_event_task(child, NULL, 0);
}
static void perf_free_event(struct perf_event *event,
@@ -9255,7 +9208,6 @@ static void perf_event_init_cpu(int cpu)
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
mutex_lock(&swhash->hlist_mutex);
- swhash->online = true;
if (swhash->hlist_refcount > 0) {
struct swevent_hlist *hlist;
@@ -9297,14 +9249,7 @@ static void perf_event_exit_cpu_context(int cpu)
static void perf_event_exit_cpu(int cpu)
{
- struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
-
perf_event_exit_cpu_context(cpu);
-
- mutex_lock(&swhash->hlist_mutex);
- swhash->online = false;
- swevent_hlist_release(swhash);
- mutex_unlock(&swhash->hlist_mutex);
}
#else
static inline void perf_event_exit_cpu(int cpu) { }
@@ -9452,16 +9397,18 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
static int __perf_cgroup_move(void *info)
{
struct task_struct *task = info;
+ rcu_read_lock();
perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN);
+ rcu_read_unlock();
return 0;
}
-static void perf_cgroup_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void perf_cgroup_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
+ struct cgroup_subsys_state *css;
- cgroup_taskset_for_each(task, tset)
+ cgroup_taskset_for_each(task, css, tset)
task_function_call(task, __perf_cgroup_move, task);
}
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index b5d1ea79c595..adfdc0536117 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -3,7 +3,7 @@
*
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
- * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
* For licensing details see kernel-base/COPYING
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 4e5e9798aa0c..7dad84913abf 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -19,7 +19,7 @@
* Authors:
* Srikar Dronamraju
* Jim Keniston
- * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
*/
#include <linux/kernel.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index f97f2c449f5c..291b08cc817b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -380,6 +380,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
#endif
tsk->splice_pipe = NULL;
tsk->task_frag.page = NULL;
+ tsk->wake_q.next = NULL;
account_kernel_stack(ti, 1);
@@ -1348,9 +1349,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
prev_cputime_init(&p->prev_cputime);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
- seqlock_init(&p->vtime_seqlock);
+ seqcount_init(&p->vtime_seqcount);
p->vtime_snap = 0;
- p->vtime_snap_whence = VTIME_SLEEPING;
+ p->vtime_snap_whence = VTIME_INACTIVE;
#endif
#if defined(SPLIT_RSS_COUNTING)
@@ -1368,8 +1369,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->real_start_time = ktime_get_boot_ns();
p->io_context = NULL;
p->audit_context = NULL;
- if (clone_flags & CLONE_THREAD)
- threadgroup_change_begin(current);
+ threadgroup_change_begin(current);
cgroup_fork(p);
#ifdef CONFIG_NUMA
p->mempolicy = mpol_dup(p->mempolicy);
@@ -1610,8 +1610,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
proc_fork_connector(p);
cgroup_post_fork(p, cgrp_ss_priv);
- if (clone_flags & CLONE_THREAD)
- threadgroup_change_end(current);
+ threadgroup_change_end(current);
perf_event_fork(p);
trace_task_newtask(p, clone_flags);
@@ -1652,8 +1651,7 @@ bad_fork_cleanup_policy:
mpol_put(p->mempolicy);
bad_fork_cleanup_threadgroup_lock:
#endif
- if (clone_flags & CLONE_THREAD)
- threadgroup_change_end(current);
+ threadgroup_change_end(current);
delayacct_tsk_free(p);
bad_fork_cleanup_count:
atomic_dec(&p->cred->user->processes);
diff --git a/kernel/futex.c b/kernel/futex.c
index 684d7549825a..8a310e240cda 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -725,9 +725,12 @@ static struct futex_pi_state * alloc_pi_state(void)
}
/*
+ * Drops a reference to the pi_state object and frees or caches it
+ * when the last reference is gone.
+ *
* Must be called with the hb lock held.
*/
-static void free_pi_state(struct futex_pi_state *pi_state)
+static void put_pi_state(struct futex_pi_state *pi_state)
{
if (!pi_state)
return;
@@ -1706,31 +1709,35 @@ retry_private:
* exist yet, look it up one more time to ensure we have a
* reference to it. If the lock was taken, ret contains the
* vpid of the top waiter task.
+ * If the lock was not taken, we have pi_state and an initial
+ * refcount on it. In case of an error we have nothing.
*/
if (ret > 0) {
WARN_ON(pi_state);
drop_count++;
task_count++;
/*
- * If we acquired the lock, then the user
- * space value of uaddr2 should be vpid. It
- * cannot be changed by the top waiter as it
- * is blocked on hb2 lock if it tries to do
- * so. If something fiddled with it behind our
- * back the pi state lookup might unearth
- * it. So we rather use the known value than
- * rereading and handing potential crap to
- * lookup_pi_state.
+ * If we acquired the lock, then the user space value
+ * of uaddr2 should be vpid. It cannot be changed by
+ * the top waiter as it is blocked on hb2 lock if it
+ * tries to do so. If something fiddled with it behind
+ * our back the pi state lookup might unearth it. So
+ * we rather use the known value than rereading and
+ * handing potential crap to lookup_pi_state.
+ *
+ * If that call succeeds then we have pi_state and an
+ * initial refcount on it.
*/
ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
}
switch (ret) {
case 0:
+ /* We hold a reference on the pi state. */
break;
+
+ /* If the above failed, then pi_state is NULL */
case -EFAULT:
- free_pi_state(pi_state);
- pi_state = NULL;
double_unlock_hb(hb1, hb2);
hb_waiters_dec(hb2);
put_futex_key(&key2);
@@ -1746,8 +1753,6 @@ retry_private:
* exit to complete.
* - The user space value changed.
*/
- free_pi_state(pi_state);
- pi_state = NULL;
double_unlock_hb(hb1, hb2);
hb_waiters_dec(hb2);
put_futex_key(&key2);
@@ -1801,30 +1806,58 @@ retry_private:
* of requeue_pi if we couldn't acquire the lock atomically.
*/
if (requeue_pi) {
- /* Prepare the waiter to take the rt_mutex. */
+ /*
+ * Prepare the waiter to take the rt_mutex. Take a
+ * refcount on the pi_state and store the pointer in
+ * the futex_q object of the waiter.
+ */
atomic_inc(&pi_state->refcount);
this->pi_state = pi_state;
ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
this->rt_waiter,
this->task);
if (ret == 1) {
- /* We got the lock. */
+ /*
+ * We got the lock. We do neither drop the
+ * refcount on pi_state nor clear
+ * this->pi_state because the waiter needs the
+ * pi_state for cleaning up the user space
+ * value. It will drop the refcount after
+ * doing so.
+ */
requeue_pi_wake_futex(this, &key2, hb2);
drop_count++;
continue;
} else if (ret) {
- /* -EDEADLK */
+ /*
+ * rt_mutex_start_proxy_lock() detected a
+ * potential deadlock when we tried to queue
+ * that waiter. Drop the pi_state reference
+ * which we took above and remove the pointer
+ * to the state from the waiters futex_q
+ * object.
+ */
this->pi_state = NULL;
- free_pi_state(pi_state);
- goto out_unlock;
+ put_pi_state(pi_state);
+ /*
+ * We stop queueing more waiters and let user
+ * space deal with the mess.
+ */
+ break;
}
}
requeue_futex(this, hb1, hb2, &key2);
drop_count++;
}
+ /*
+ * We took an extra initial reference to the pi_state either
+ * in futex_proxy_trylock_atomic() or in lookup_pi_state(). We
+ * need to drop it here again.
+ */
+ put_pi_state(pi_state);
+
out_unlock:
- free_pi_state(pi_state);
double_unlock_hb(hb1, hb2);
wake_up_q(&wake_q);
hb_waiters_dec(hb2);
@@ -1973,7 +2006,7 @@ static void unqueue_me_pi(struct futex_q *q)
__unqueue_futex(q);
BUG_ON(!q->pi_state);
- free_pi_state(q->pi_state);
+ put_pi_state(q->pi_state);
q->pi_state = NULL;
spin_unlock(q->lock_ptr);
@@ -2755,6 +2788,11 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (q.pi_state && (q.pi_state->owner != current)) {
spin_lock(q.lock_ptr);
ret = fixup_pi_state_owner(uaddr2, &q, current);
+ /*
+ * Drop the reference to the pi state which
+ * the requeue_pi() code acquired for us.
+ */
+ put_pi_state(q.pi_state);
spin_unlock(q.lock_ptr);
}
} else {
@@ -3046,7 +3084,8 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
if (op & FUTEX_CLOCK_REALTIME) {
flags |= FLAGS_CLOCKRT;
- if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
+ if (cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET && \
+ cmd != FUTEX_WAIT_REQUEUE_PI)
return -ENOSYS;
}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 15206453b12a..5797909f4e5b 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -338,7 +338,6 @@ void handle_nested_irq(unsigned int irq)
raw_spin_lock_irq(&desc->lock);
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
- kstat_incr_irqs_this_cpu(desc);
action = desc->action;
if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) {
@@ -346,6 +345,7 @@ void handle_nested_irq(unsigned int irq)
goto out_unlock;
}
+ kstat_incr_irqs_this_cpu(desc);
irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
raw_spin_unlock_irq(&desc->lock);
@@ -412,13 +412,13 @@ void handle_simple_irq(struct irq_desc *desc)
goto out_unlock;
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
- kstat_incr_irqs_this_cpu(desc);
if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
desc->istate |= IRQS_PENDING;
goto out_unlock;
}
+ kstat_incr_irqs_this_cpu(desc);
handle_irq_event(desc);
out_unlock:
@@ -462,7 +462,6 @@ void handle_level_irq(struct irq_desc *desc)
goto out_unlock;
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
- kstat_incr_irqs_this_cpu(desc);
/*
* If its disabled or no action available
@@ -473,6 +472,7 @@ void handle_level_irq(struct irq_desc *desc)
goto out_unlock;
}
+ kstat_incr_irqs_this_cpu(desc);
handle_irq_event(desc);
cond_unmask_irq(desc);
@@ -532,7 +532,6 @@ void handle_fasteoi_irq(struct irq_desc *desc)
goto out;
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
- kstat_incr_irqs_this_cpu(desc);
/*
* If its disabled or no action available
@@ -544,6 +543,7 @@ void handle_fasteoi_irq(struct irq_desc *desc)
goto out;
}
+ kstat_incr_irqs_this_cpu(desc);
if (desc->istate & IRQS_ONESHOT)
mask_irq(desc);
@@ -950,6 +950,7 @@ void irq_chip_ack_parent(struct irq_data *data)
data = data->parent_data;
data->chip->irq_ack(data);
}
+EXPORT_SYMBOL_GPL(irq_chip_ack_parent);
/**
* irq_chip_mask_parent - Mask the parent interrupt
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 239e2ae2c947..0409da0bcc33 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -159,6 +159,7 @@ static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
raw_spin_lock_init(&desc->lock);
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+ init_rcu_head(&desc->rcu);
desc_set_defaults(irq, desc, node, owner);
@@ -171,6 +172,15 @@ err_desc:
return NULL;
}
+static void delayed_free_desc(struct rcu_head *rhp)
+{
+ struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu);
+
+ free_masks(desc);
+ free_percpu(desc->kstat_irqs);
+ kfree(desc);
+}
+
static void free_desc(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -187,9 +197,12 @@ static void free_desc(unsigned int irq)
delete_irq_desc(irq);
mutex_unlock(&sparse_irq_lock);
- free_masks(desc);
- free_percpu(desc->kstat_irqs);
- kfree(desc);
+ /*
+ * We free the descriptor, masks and stat fields via RCU. That
+ * allows demultiplex interrupts to do rcu based management of
+ * the child interrupts.
+ */
+ call_rcu(&desc->rcu, delayed_free_desc);
}
static int alloc_descs(unsigned int start, unsigned int cnt, int node,
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 22aa9612ef7c..8cf95de1ab3f 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -60,6 +60,7 @@ struct fwnode_handle *irq_domain_alloc_fwnode(void *data)
fwid->fwnode.type = FWNODE_IRQCHIP;
return &fwid->fwnode;
}
+EXPORT_SYMBOL_GPL(irq_domain_alloc_fwnode);
/**
* irq_domain_free_fwnode - Free a non-OF-backed fwnode_handle
@@ -70,13 +71,14 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
{
struct irqchip_fwid *fwid;
- if (WARN_ON(fwnode->type != FWNODE_IRQCHIP))
+ if (WARN_ON(!is_fwnode_irqchip(fwnode)))
return;
fwid = container_of(fwnode, struct irqchip_fwid, fwnode);
kfree(fwid->name);
kfree(fwid);
}
+EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
/**
* __irq_domain_add() - Allocate a new irq_domain data structure
@@ -1013,6 +1015,7 @@ struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
return NULL;
}
+EXPORT_SYMBOL_GPL(irq_domain_get_irq_data);
/**
* irq_domain_set_hwirq_and_chip - Set hwirq and irqchip of @virq at @domain
@@ -1125,9 +1128,9 @@ static void irq_domain_free_irqs_recursive(struct irq_domain *domain,
}
}
-static int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
- unsigned int irq_base,
- unsigned int nr_irqs, void *arg)
+int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
+ unsigned int irq_base,
+ unsigned int nr_irqs, void *arg)
{
int ret = 0;
struct irq_domain *parent = domain->parent;
@@ -1343,6 +1346,7 @@ struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
return (irq_data && irq_data->domain == domain) ? irq_data : NULL;
}
+EXPORT_SYMBOL_GPL(irq_domain_get_irq_data);
/**
* irq_domain_set_info - Set the complete data for a @virq in @domain
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0eebaeef317b..841187239adc 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1434,6 +1434,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
if (!desc)
return NULL;
+ chip_bus_lock(desc);
raw_spin_lock_irqsave(&desc->lock, flags);
/*
@@ -1447,7 +1448,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
if (!action) {
WARN(1, "Trying to free already-free IRQ %d\n", irq);
raw_spin_unlock_irqrestore(&desc->lock, flags);
-
+ chip_bus_sync_unlock(desc);
return NULL;
}
@@ -1475,6 +1476,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
#endif
raw_spin_unlock_irqrestore(&desc->lock, flags);
+ chip_bus_sync_unlock(desc);
unregister_handler_proc(irq, action);
@@ -1553,9 +1555,7 @@ void free_irq(unsigned int irq, void *dev_id)
desc->affinity_notify = NULL;
#endif
- chip_bus_lock(desc);
kfree(__free_irq(irq, dev_id));
- chip_bus_sync_unlock(desc);
}
EXPORT_SYMBOL(free_irq);
@@ -1743,6 +1743,31 @@ out:
}
EXPORT_SYMBOL_GPL(enable_percpu_irq);
+/**
+ * irq_percpu_is_enabled - Check whether the per cpu irq is enabled
+ * @irq: Linux irq number to check for
+ *
+ * Must be called from a non migratable context. Returns the enable
+ * state of a per cpu interrupt on the current cpu.
+ */
+bool irq_percpu_is_enabled(unsigned int irq)
+{
+ unsigned int cpu = smp_processor_id();
+ struct irq_desc *desc;
+ unsigned long flags;
+ bool is_enabled;
+
+ desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU);
+ if (!desc)
+ return false;
+
+ is_enabled = cpumask_test_cpu(cpu, desc->percpu_enabled);
+ irq_put_desc_unlock(desc, flags);
+
+ return is_enabled;
+}
+EXPORT_SYMBOL_GPL(irq_percpu_is_enabled);
+
void disable_percpu_irq(unsigned int irq)
{
unsigned int cpu = smp_processor_id();
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 6b0c0b74a2a1..15b249e7c673 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -252,6 +252,60 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
&msi_domain_ops, info);
}
+int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *arg)
+{
+ struct msi_domain_info *info = domain->host_data;
+ struct msi_domain_ops *ops = info->ops;
+ int ret;
+
+ ret = ops->msi_check(domain, info, dev);
+ if (ret == 0)
+ ret = ops->msi_prepare(domain, dev, nvec, arg);
+
+ return ret;
+}
+
+int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
+ int virq, int nvec, msi_alloc_info_t *arg)
+{
+ struct msi_domain_info *info = domain->host_data;
+ struct msi_domain_ops *ops = info->ops;
+ struct msi_desc *desc;
+ int ret = 0;
+
+ for_each_msi_entry(desc, dev) {
+ /* Don't even try the multi-MSI brain damage. */
+ if (WARN_ON(!desc->irq || desc->nvec_used != 1)) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (!(desc->irq >= virq && desc->irq < (virq + nvec)))
+ continue;
+
+ ops->set_desc(arg, desc);
+ /* Assumes the domain mutex is held! */
+ ret = irq_domain_alloc_irqs_recursive(domain, virq, 1, arg);
+ if (ret)
+ break;
+
+ irq_set_msi_desc_off(virq, 0, desc);
+ }
+
+ if (ret) {
+ /* Mop up the damage */
+ for_each_msi_entry(desc, dev) {
+ if (!(desc->irq >= virq && desc->irq < (virq + nvec)))
+ continue;
+
+ irq_domain_free_irqs_common(domain, desc->irq, 1);
+ }
+ }
+
+ return ret;
+}
+
/**
* msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain
* @domain: The domain to allocate from
@@ -270,9 +324,7 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
struct msi_desc *desc;
int i, ret, virq = -1;
- ret = ops->msi_check(domain, info, dev);
- if (ret == 0)
- ret = ops->msi_prepare(domain, dev, nvec, &arg);
+ ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg);
if (ret)
return ret;
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index cbf9fb899d92..bcf107ce0854 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra
*
* Provides a framework for enqueueing and running callbacks from hardirq
* context. The enqueueing is NMI-safe.
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index f7dd15d537f9..05254eeb4b4e 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -2,7 +2,7 @@
* jump label support
*
* Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
- * Copyright (C) 2011 Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2011 Peter Zijlstra
*
*/
#include <linux/memory.h>
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 11b64a63c0f8..c823f3001e12 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -853,7 +853,12 @@ struct kimage *kexec_image;
struct kimage *kexec_crash_image;
int kexec_load_disabled;
-void crash_kexec(struct pt_regs *regs)
+/*
+ * No panic_cpu check version of crash_kexec(). This function is called
+ * only when panic_cpu holds the current CPU number; this is the only CPU
+ * which processes crash_kexec routines.
+ */
+void __crash_kexec(struct pt_regs *regs)
{
/* Take the kexec_mutex here to prevent sys_kexec_load
* running on one cpu from replacing the crash kernel
@@ -876,6 +881,29 @@ void crash_kexec(struct pt_regs *regs)
}
}
+void crash_kexec(struct pt_regs *regs)
+{
+ int old_cpu, this_cpu;
+
+ /*
+ * Only one CPU is allowed to execute the crash_kexec() code as with
+ * panic(). Otherwise parallel calls of panic() and crash_kexec()
+ * may stop each other. To exclude them, we use panic_cpu here too.
+ */
+ this_cpu = raw_smp_processor_id();
+ old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
+ if (old_cpu == PANIC_CPU_INVALID) {
+ /* This is the 1st CPU which comes here, so go ahead. */
+ __crash_kexec(regs);
+
+ /*
+ * Reset panic_cpu to allow another panic()/crash_kexec()
+ * call.
+ */
+ atomic_set(&panic_cpu, PANIC_CPU_INVALID);
+ }
+}
+
size_t crash_get_memory_size(void)
{
size_t size = 0;
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index e83b26464061..152da4a48867 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -20,7 +20,7 @@
#include <linux/capability.h>
#include <linux/compiler.h>
-#include <linux/rcupdate.h> /* rcu_expedited */
+#include <linux/rcupdate.h> /* rcu_expedited and rcu_normal */
#define KERNEL_ATTR_RO(_name) \
static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
@@ -144,11 +144,12 @@ static ssize_t fscaps_show(struct kobject *kobj,
}
KERNEL_ATTR_RO(fscaps);
+#ifndef CONFIG_TINY_RCU
int rcu_expedited;
static ssize_t rcu_expedited_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", rcu_expedited);
+ return sprintf(buf, "%d\n", READ_ONCE(rcu_expedited));
}
static ssize_t rcu_expedited_store(struct kobject *kobj,
struct kobj_attribute *attr,
@@ -161,6 +162,24 @@ static ssize_t rcu_expedited_store(struct kobject *kobj,
}
KERNEL_ATTR_RW(rcu_expedited);
+int rcu_normal;
+static ssize_t rcu_normal_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", READ_ONCE(rcu_normal));
+}
+static ssize_t rcu_normal_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ if (kstrtoint(buf, 0, &rcu_normal))
+ return -EINVAL;
+
+ return count;
+}
+KERNEL_ATTR_RW(rcu_normal);
+#endif /* #ifndef CONFIG_TINY_RCU */
+
/*
* Make /sys/kernel/notes give the raw contents of our kernel .notes section.
*/
@@ -202,7 +221,10 @@ static struct attribute * kernel_attrs[] = {
&kexec_crash_size_attr.attr,
&vmcoreinfo_attr.attr,
#endif
+#ifndef CONFIG_TINY_RCU
&rcu_expedited_attr.attr,
+ &rcu_normal_attr.attr,
+#endif
NULL
};
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 6e5344112419..db545cbcdb89 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -294,6 +294,12 @@ static int klp_write_object_relocations(struct module *pmod,
for (reloc = obj->relocs; reloc->name; reloc++) {
if (!klp_is_module(obj)) {
+
+#if defined(CONFIG_RANDOMIZE_BASE)
+ /* If KASLR has been enabled, adjust old value accordingly */
+ if (kaslr_enabled())
+ reloc->val += kaslr_offset();
+#endif
ret = klp_verify_vmlinux_symbol(reloc->name,
reloc->val);
if (ret)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index deae3907ac1e..60ace56618f6 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -6,7 +6,7 @@
* Started by Ingo Molnar:
*
* Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
*
* this code maps all the lock dependencies as they occur in a live kernel
* and will warn about the following classes of locking bugs:
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index d83d798bef95..dbb61a302548 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -6,7 +6,7 @@
* Started by Ingo Molnar:
*
* Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
*
* Code for /proc/lockdep and /proc/lockdep_stats:
*
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index d092a0c9c2d4..05a37857ab55 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -93,10 +93,12 @@ bool osq_lock(struct optimistic_spin_queue *lock)
node->cpu = curr;
/*
- * ACQUIRE semantics, pairs with corresponding RELEASE
- * in unlock() uncontended, or fastpath.
+ * We need both ACQUIRE (pairs with corresponding RELEASE in
+ * unlock() uncontended, or fastpath) and RELEASE (to publish
+ * the node fields we just initialised) semantics when updating
+ * the lock tail.
*/
- old = atomic_xchg_acquire(&lock->tail, curr);
+ old = atomic_xchg(&lock->tail, curr);
if (old == OSQ_UNLOCKED_VAL)
return true;
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 87e9ce6a63c5..393d1874b9e0 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -14,8 +14,9 @@
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
* (C) Copyright 2013-2014 Red Hat, Inc.
* (C) Copyright 2015 Intel Corp.
+ * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
*
- * Authors: Waiman Long <waiman.long@hp.com>
+ * Authors: Waiman Long <waiman.long@hpe.com>
* Peter Zijlstra <peterz@infradead.org>
*/
@@ -176,7 +177,12 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
{
struct __qspinlock *l = (void *)lock;
- return (u32)xchg(&l->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
+ /*
+ * Use release semantics to make sure that the MCS node is properly
+ * initialized before changing the tail code.
+ */
+ return (u32)xchg_release(&l->tail,
+ tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
}
#else /* _Q_PENDING_BITS == 8 */
@@ -208,7 +214,11 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
for (;;) {
new = (val & _Q_LOCKED_PENDING_MASK) | tail;
- old = atomic_cmpxchg(&lock->val, val, new);
+ /*
+ * Use release semantics to make sure that the MCS node is
+ * properly initialized before changing the tail code.
+ */
+ old = atomic_cmpxchg_release(&lock->val, val, new);
if (old == val)
break;
@@ -238,18 +248,20 @@ static __always_inline void set_locked(struct qspinlock *lock)
*/
static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
-static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { }
+static __always_inline void __pv_wait_node(struct mcs_spinlock *node,
+ struct mcs_spinlock *prev) { }
static __always_inline void __pv_kick_node(struct qspinlock *lock,
struct mcs_spinlock *node) { }
-static __always_inline void __pv_wait_head(struct qspinlock *lock,
- struct mcs_spinlock *node) { }
+static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
+ struct mcs_spinlock *node)
+ { return 0; }
#define pv_enabled() false
#define pv_init_node __pv_init_node
#define pv_wait_node __pv_wait_node
#define pv_kick_node __pv_kick_node
-#define pv_wait_head __pv_wait_head
+#define pv_wait_head_or_lock __pv_wait_head_or_lock
#ifdef CONFIG_PARAVIRT_SPINLOCKS
#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
@@ -319,7 +331,11 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
if (val == new)
new |= _Q_PENDING_VAL;
- old = atomic_cmpxchg(&lock->val, val, new);
+ /*
+ * Acquire semantic is required here as the function may
+ * return immediately if the lock was free.
+ */
+ old = atomic_cmpxchg_acquire(&lock->val, val, new);
if (old == val)
break;
@@ -382,6 +398,7 @@ queue:
* p,*,* -> n,*,*
*/
old = xchg_tail(lock, tail);
+ next = NULL;
/*
* if there was a previous node; link it and wait until reaching the
@@ -391,8 +408,18 @@ queue:
prev = decode_tail(old);
WRITE_ONCE(prev->next, node);
- pv_wait_node(node);
+ pv_wait_node(node, prev);
arch_mcs_spin_lock_contended(&node->locked);
+
+ /*
+ * While waiting for the MCS lock, the next pointer may have
+ * been set by another lock waiter. We optimistically load
+ * the next pointer & prefetch the cacheline for writing
+ * to reduce latency in the upcoming MCS unlock operation.
+ */
+ next = READ_ONCE(node->next);
+ if (next)
+ prefetchw(next);
}
/*
@@ -406,11 +433,22 @@ queue:
* sequentiality; this is because the set_locked() function below
* does not imply a full barrier.
*
+ * The PV pv_wait_head_or_lock function, if active, will acquire
+ * the lock and return a non-zero value. So we have to skip the
+ * smp_load_acquire() call. As the next PV queue head hasn't been
+ * designated yet, there is no way for the locked value to become
+ * _Q_SLOW_VAL. So both the set_locked() and the
+ * atomic_cmpxchg_relaxed() calls will be safe.
+ *
+ * If PV isn't active, 0 will be returned instead.
+ *
*/
- pv_wait_head(lock, node);
- while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK)
- cpu_relax();
+ if ((val = pv_wait_head_or_lock(lock, node)))
+ goto locked;
+ smp_cond_acquire(!((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK));
+
+locked:
/*
* claim the lock:
*
@@ -422,11 +460,17 @@ queue:
* to grab the lock.
*/
for (;;) {
- if (val != tail) {
+ /* In the PV case we might already have _Q_LOCKED_VAL set */
+ if ((val & _Q_TAIL_MASK) != tail) {
set_locked(lock);
break;
}
- old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL);
+ /*
+ * The smp_load_acquire() call above has provided the necessary
+ * acquire semantics required for locking. At most two
+ * iterations of this loop may be ran.
+ */
+ old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
if (old == val)
goto release; /* No contention */
@@ -434,10 +478,12 @@ queue:
}
/*
- * contended path; wait for next, release.
+ * contended path; wait for next if not observed yet, release.
*/
- while (!(next = READ_ONCE(node->next)))
- cpu_relax();
+ if (!next) {
+ while (!(next = READ_ONCE(node->next)))
+ cpu_relax();
+ }
arch_mcs_spin_unlock_contended(&next->locked);
pv_kick_node(lock, next);
@@ -462,7 +508,7 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath);
#undef pv_init_node
#undef pv_wait_node
#undef pv_kick_node
-#undef pv_wait_head
+#undef pv_wait_head_or_lock
#undef queued_spin_lock_slowpath
#define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index f0450ff4829b..87bb235c3448 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -23,6 +23,20 @@
#define _Q_SLOW_VAL (3U << _Q_LOCKED_OFFSET)
/*
+ * Queue Node Adaptive Spinning
+ *
+ * A queue node vCPU will stop spinning if the vCPU in the previous node is
+ * not running. The one lock stealing attempt allowed at slowpath entry
+ * mitigates the slight slowdown for non-overcommitted guest with this
+ * aggressive wait-early mechanism.
+ *
+ * The status of the previous node will be checked at fixed interval
+ * controlled by PV_PREV_CHECK_MASK. This is to ensure that we won't
+ * pound on the cacheline of the previous node too heavily.
+ */
+#define PV_PREV_CHECK_MASK 0xff
+
+/*
* Queue node uses: vcpu_running & vcpu_halted.
* Queue head uses: vcpu_running & vcpu_hashed.
*/
@@ -41,6 +55,94 @@ struct pv_node {
};
/*
+ * By replacing the regular queued_spin_trylock() with the function below,
+ * it will be called once when a lock waiter enter the PV slowpath before
+ * being queued. By allowing one lock stealing attempt here when the pending
+ * bit is off, it helps to reduce the performance impact of lock waiter
+ * preemption without the drawback of lock starvation.
+ */
+#define queued_spin_trylock(l) pv_queued_spin_steal_lock(l)
+static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock)
+{
+ struct __qspinlock *l = (void *)lock;
+
+ return !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
+ (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0);
+}
+
+/*
+ * The pending bit is used by the queue head vCPU to indicate that it
+ * is actively spinning on the lock and no lock stealing is allowed.
+ */
+#if _Q_PENDING_BITS == 8
+static __always_inline void set_pending(struct qspinlock *lock)
+{
+ struct __qspinlock *l = (void *)lock;
+
+ WRITE_ONCE(l->pending, 1);
+}
+
+static __always_inline void clear_pending(struct qspinlock *lock)
+{
+ struct __qspinlock *l = (void *)lock;
+
+ WRITE_ONCE(l->pending, 0);
+}
+
+/*
+ * The pending bit check in pv_queued_spin_steal_lock() isn't a memory
+ * barrier. Therefore, an atomic cmpxchg() is used to acquire the lock
+ * just to be sure that it will get it.
+ */
+static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+{
+ struct __qspinlock *l = (void *)lock;
+
+ return !READ_ONCE(l->locked) &&
+ (cmpxchg(&l->locked_pending, _Q_PENDING_VAL, _Q_LOCKED_VAL)
+ == _Q_PENDING_VAL);
+}
+#else /* _Q_PENDING_BITS == 8 */
+static __always_inline void set_pending(struct qspinlock *lock)
+{
+ atomic_set_mask(_Q_PENDING_VAL, &lock->val);
+}
+
+static __always_inline void clear_pending(struct qspinlock *lock)
+{
+ atomic_clear_mask(_Q_PENDING_VAL, &lock->val);
+}
+
+static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+{
+ int val = atomic_read(&lock->val);
+
+ for (;;) {
+ int old, new;
+
+ if (val & _Q_LOCKED_MASK)
+ break;
+
+ /*
+ * Try to clear pending bit & set locked bit
+ */
+ old = val;
+ new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+ val = atomic_cmpxchg(&lock->val, old, new);
+
+ if (val == old)
+ return 1;
+ }
+ return 0;
+}
+#endif /* _Q_PENDING_BITS == 8 */
+
+/*
+ * Include queued spinlock statistics code
+ */
+#include "qspinlock_stat.h"
+
+/*
* Lock and MCS node addresses hash table for fast lookup
*
* Hashing is done on a per-cacheline basis to minimize the need to access
@@ -100,10 +202,13 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
{
unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits);
struct pv_hash_entry *he;
+ int hopcnt = 0;
for_each_hash_entry(he, offset, hash) {
+ hopcnt++;
if (!cmpxchg(&he->lock, NULL, lock)) {
WRITE_ONCE(he->node, node);
+ qstat_hop(hopcnt);
return &he->lock;
}
}
@@ -144,6 +249,20 @@ static struct pv_node *pv_unhash(struct qspinlock *lock)
}
/*
+ * Return true if when it is time to check the previous node which is not
+ * in a running state.
+ */
+static inline bool
+pv_wait_early(struct pv_node *prev, int loop)
+{
+
+ if ((loop & PV_PREV_CHECK_MASK) != 0)
+ return false;
+
+ return READ_ONCE(prev->state) != vcpu_running;
+}
+
+/*
* Initialize the PV part of the mcs_spinlock node.
*/
static void pv_init_node(struct mcs_spinlock *node)
@@ -161,15 +280,23 @@ static void pv_init_node(struct mcs_spinlock *node)
* pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its
* behalf.
*/
-static void pv_wait_node(struct mcs_spinlock *node)
+static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
{
struct pv_node *pn = (struct pv_node *)node;
+ struct pv_node *pp = (struct pv_node *)prev;
+ int waitcnt = 0;
int loop;
+ bool wait_early;
- for (;;) {
- for (loop = SPIN_THRESHOLD; loop; loop--) {
+ /* waitcnt processing will be compiled out if !QUEUED_LOCK_STAT */
+ for (;; waitcnt++) {
+ for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
if (READ_ONCE(node->locked))
return;
+ if (pv_wait_early(pp, loop)) {
+ wait_early = true;
+ break;
+ }
cpu_relax();
}
@@ -184,12 +311,17 @@ static void pv_wait_node(struct mcs_spinlock *node)
*/
smp_store_mb(pn->state, vcpu_halted);
- if (!READ_ONCE(node->locked))
+ if (!READ_ONCE(node->locked)) {
+ qstat_inc(qstat_pv_wait_node, true);
+ qstat_inc(qstat_pv_wait_again, waitcnt);
+ qstat_inc(qstat_pv_wait_early, wait_early);
pv_wait(&pn->state, vcpu_halted);
+ }
/*
- * If pv_kick_node() changed us to vcpu_hashed, retain that value
- * so that pv_wait_head() knows to not also try to hash this lock.
+ * If pv_kick_node() changed us to vcpu_hashed, retain that
+ * value so that pv_wait_head_or_lock() knows to not also try
+ * to hash this lock.
*/
cmpxchg(&pn->state, vcpu_halted, vcpu_running);
@@ -200,6 +332,7 @@ static void pv_wait_node(struct mcs_spinlock *node)
* So it is better to spin for a while in the hope that the
* MCS lock will be released soon.
*/
+ qstat_inc(qstat_pv_spurious_wakeup, !READ_ONCE(node->locked));
}
/*
@@ -212,8 +345,9 @@ static void pv_wait_node(struct mcs_spinlock *node)
/*
* Called after setting next->locked = 1 when we're the lock owner.
*
- * Instead of waking the waiters stuck in pv_wait_node() advance their state such
- * that they're waiting in pv_wait_head(), this avoids a wake/sleep cycle.
+ * Instead of waking the waiters stuck in pv_wait_node() advance their state
+ * such that they're waiting in pv_wait_head_or_lock(), this avoids a
+ * wake/sleep cycle.
*/
static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
{
@@ -242,14 +376,19 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
}
/*
- * Wait for l->locked to become clear; halt the vcpu after a short spin.
+ * Wait for l->locked to become clear and acquire the lock;
+ * halt the vcpu after a short spin.
* __pv_queued_spin_unlock() will wake us.
+ *
+ * The current value of the lock will be returned for additional processing.
*/
-static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
+static u32
+pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
struct __qspinlock *l = (void *)lock;
struct qspinlock **lp = NULL;
+ int waitcnt = 0;
int loop;
/*
@@ -259,12 +398,25 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
if (READ_ONCE(pn->state) == vcpu_hashed)
lp = (struct qspinlock **)1;
- for (;;) {
+ for (;; waitcnt++) {
+ /*
+ * Set correct vCPU state to be used by queue node wait-early
+ * mechanism.
+ */
+ WRITE_ONCE(pn->state, vcpu_running);
+
+ /*
+ * Set the pending bit in the active lock spinning loop to
+ * disable lock stealing before attempting to acquire the lock.
+ */
+ set_pending(lock);
for (loop = SPIN_THRESHOLD; loop; loop--) {
- if (!READ_ONCE(l->locked))
- return;
+ if (trylock_clear_pending(lock))
+ goto gotlock;
cpu_relax();
}
+ clear_pending(lock);
+
if (!lp) { /* ONCE */
lp = pv_hash(lock, pn);
@@ -280,51 +432,50 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
*
* Matches the smp_rmb() in __pv_queued_spin_unlock().
*/
- if (!cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL)) {
+ if (xchg(&l->locked, _Q_SLOW_VAL) == 0) {
/*
- * The lock is free and _Q_SLOW_VAL has never
- * been set. Therefore we need to unhash before
- * getting the lock.
+ * The lock was free and now we own the lock.
+ * Change the lock value back to _Q_LOCKED_VAL
+ * and unhash the table.
*/
+ WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
WRITE_ONCE(*lp, NULL);
- return;
+ goto gotlock;
}
}
+ WRITE_ONCE(pn->state, vcpu_halted);
+ qstat_inc(qstat_pv_wait_head, true);
+ qstat_inc(qstat_pv_wait_again, waitcnt);
pv_wait(&l->locked, _Q_SLOW_VAL);
/*
* The unlocker should have freed the lock before kicking the
* CPU. So if the lock is still not free, it is a spurious
- * wakeup and so the vCPU should wait again after spinning for
- * a while.
+ * wakeup or another vCPU has stolen the lock. The current
+ * vCPU should spin again.
*/
+ qstat_inc(qstat_pv_spurious_wakeup, READ_ONCE(l->locked));
}
/*
- * Lock is unlocked now; the caller will acquire it without waiting.
- * As with pv_wait_node() we rely on the caller to do a load-acquire
- * for us.
+ * The cmpxchg() or xchg() call before coming here provides the
+ * acquire semantics for locking. The dummy ORing of _Q_LOCKED_VAL
+ * here is to indicate to the compiler that the value will always
+ * be nozero to enable better code optimization.
*/
+gotlock:
+ return (u32)(atomic_read(&lock->val) | _Q_LOCKED_VAL);
}
/*
- * PV version of the unlock function to be used in stead of
- * queued_spin_unlock().
+ * PV versions of the unlock fastpath and slowpath functions to be used
+ * instead of queued_spin_unlock().
*/
-__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
+__visible void
+__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
{
struct __qspinlock *l = (void *)lock;
struct pv_node *node;
- u8 locked;
-
- /*
- * We must not unlock if SLOW, because in that case we must first
- * unhash. Otherwise it would be possible to have multiple @lock
- * entries, which would be BAD.
- */
- locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
- if (likely(locked == _Q_LOCKED_VAL))
- return;
if (unlikely(locked != _Q_SLOW_VAL)) {
WARN(!debug_locks_silent,
@@ -338,7 +489,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
* so we need a barrier to order the read of the node data in
* pv_unhash *after* we've read the lock being _Q_SLOW_VAL.
*
- * Matches the cmpxchg() in pv_wait_head() setting _Q_SLOW_VAL.
+ * Matches the cmpxchg() in pv_wait_head_or_lock() setting _Q_SLOW_VAL.
*/
smp_rmb();
@@ -361,14 +512,35 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
* vCPU is harmless other than the additional latency in completing
* the unlock.
*/
+ qstat_inc(qstat_pv_kick_unlock, true);
pv_kick(node->cpu);
}
+
/*
* Include the architecture specific callee-save thunk of the
* __pv_queued_spin_unlock(). This thunk is put together with
- * __pv_queued_spin_unlock() near the top of the file to make sure
- * that the callee-save thunk and the real unlock function are close
- * to each other sharing consecutive instruction cachelines.
+ * __pv_queued_spin_unlock() to make the callee-save thunk and the real unlock
+ * function close to each other sharing consecutive instruction cachelines.
+ * Alternatively, architecture specific version of __pv_queued_spin_unlock()
+ * can be defined.
*/
#include <asm/qspinlock_paravirt.h>
+#ifndef __pv_queued_spin_unlock
+__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
+{
+ struct __qspinlock *l = (void *)lock;
+ u8 locked;
+
+ /*
+ * We must not unlock if SLOW, because in that case we must first
+ * unhash. Otherwise it would be possible to have multiple @lock
+ * entries, which would be BAD.
+ */
+ locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
+ if (likely(locked == _Q_LOCKED_VAL))
+ return;
+
+ __pv_queued_spin_unlock_slowpath(lock, locked);
+}
+#endif /* __pv_queued_spin_unlock */
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
new file mode 100644
index 000000000000..640dcecdd1df
--- /dev/null
+++ b/kernel/locking/qspinlock_stat.h
@@ -0,0 +1,300 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Authors: Waiman Long <waiman.long@hpe.com>
+ */
+
+/*
+ * When queued spinlock statistical counters are enabled, the following
+ * debugfs files will be created for reporting the counter values:
+ *
+ * <debugfs>/qlockstat/
+ * pv_hash_hops - average # of hops per hashing operation
+ * pv_kick_unlock - # of vCPU kicks issued at unlock time
+ * pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake
+ * pv_latency_kick - average latency (ns) of vCPU kick operation
+ * pv_latency_wake - average latency (ns) from vCPU kick to wakeup
+ * pv_lock_stealing - # of lock stealing operations
+ * pv_spurious_wakeup - # of spurious wakeups
+ * pv_wait_again - # of vCPU wait's that happened after a vCPU kick
+ * pv_wait_early - # of early vCPU wait's
+ * pv_wait_head - # of vCPU wait's at the queue head
+ * pv_wait_node - # of vCPU wait's at a non-head queue node
+ *
+ * Writing to the "reset_counters" file will reset all the above counter
+ * values.
+ *
+ * These statistical counters are implemented as per-cpu variables which are
+ * summed and computed whenever the corresponding debugfs files are read. This
+ * minimizes added overhead making the counters usable even in a production
+ * environment.
+ *
+ * There may be slight difference between pv_kick_wake and pv_kick_unlock.
+ */
+enum qlock_stats {
+ qstat_pv_hash_hops,
+ qstat_pv_kick_unlock,
+ qstat_pv_kick_wake,
+ qstat_pv_latency_kick,
+ qstat_pv_latency_wake,
+ qstat_pv_lock_stealing,
+ qstat_pv_spurious_wakeup,
+ qstat_pv_wait_again,
+ qstat_pv_wait_early,
+ qstat_pv_wait_head,
+ qstat_pv_wait_node,
+ qstat_num, /* Total number of statistical counters */
+ qstat_reset_cnts = qstat_num,
+};
+
+#ifdef CONFIG_QUEUED_LOCK_STAT
+/*
+ * Collect pvqspinlock statistics
+ */
+#include <linux/debugfs.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+
+static const char * const qstat_names[qstat_num + 1] = {
+ [qstat_pv_hash_hops] = "pv_hash_hops",
+ [qstat_pv_kick_unlock] = "pv_kick_unlock",
+ [qstat_pv_kick_wake] = "pv_kick_wake",
+ [qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
+ [qstat_pv_latency_kick] = "pv_latency_kick",
+ [qstat_pv_latency_wake] = "pv_latency_wake",
+ [qstat_pv_lock_stealing] = "pv_lock_stealing",
+ [qstat_pv_wait_again] = "pv_wait_again",
+ [qstat_pv_wait_early] = "pv_wait_early",
+ [qstat_pv_wait_head] = "pv_wait_head",
+ [qstat_pv_wait_node] = "pv_wait_node",
+ [qstat_reset_cnts] = "reset_counters",
+};
+
+/*
+ * Per-cpu counters
+ */
+static DEFINE_PER_CPU(unsigned long, qstats[qstat_num]);
+static DEFINE_PER_CPU(u64, pv_kick_time);
+
+/*
+ * Function to read and return the qlock statistical counter values
+ *
+ * The following counters are handled specially:
+ * 1. qstat_pv_latency_kick
+ * Average kick latency (ns) = pv_latency_kick/pv_kick_unlock
+ * 2. qstat_pv_latency_wake
+ * Average wake latency (ns) = pv_latency_wake/pv_kick_wake
+ * 3. qstat_pv_hash_hops
+ * Average hops/hash = pv_hash_hops/pv_kick_unlock
+ */
+static ssize_t qstat_read(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ char buf[64];
+ int cpu, counter, len;
+ u64 stat = 0, kicks = 0;
+
+ /*
+ * Get the counter ID stored in file->f_inode->i_private
+ */
+ if (!file->f_inode) {
+ WARN_ON_ONCE(1);
+ return -EBADF;
+ }
+ counter = (long)(file->f_inode->i_private);
+
+ if (counter >= qstat_num)
+ return -EBADF;
+
+ for_each_possible_cpu(cpu) {
+ stat += per_cpu(qstats[counter], cpu);
+ /*
+ * Need to sum additional counter for some of them
+ */
+ switch (counter) {
+
+ case qstat_pv_latency_kick:
+ case qstat_pv_hash_hops:
+ kicks += per_cpu(qstats[qstat_pv_kick_unlock], cpu);
+ break;
+
+ case qstat_pv_latency_wake:
+ kicks += per_cpu(qstats[qstat_pv_kick_wake], cpu);
+ break;
+ }
+ }
+
+ if (counter == qstat_pv_hash_hops) {
+ u64 frac;
+
+ frac = 100ULL * do_div(stat, kicks);
+ frac = DIV_ROUND_CLOSEST_ULL(frac, kicks);
+
+ /*
+ * Return a X.XX decimal number
+ */
+ len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", stat, frac);
+ } else {
+ /*
+ * Round to the nearest ns
+ */
+ if ((counter == qstat_pv_latency_kick) ||
+ (counter == qstat_pv_latency_wake)) {
+ stat = 0;
+ if (kicks)
+ stat = DIV_ROUND_CLOSEST_ULL(stat, kicks);
+ }
+ len = snprintf(buf, sizeof(buf) - 1, "%llu\n", stat);
+ }
+
+ return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+}
+
+/*
+ * Function to handle write request
+ *
+ * When counter = reset_cnts, reset all the counter values.
+ * Since the counter updates aren't atomic, the resetting is done twice
+ * to make sure that the counters are very likely to be all cleared.
+ */
+static ssize_t qstat_write(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ int cpu;
+
+ /*
+ * Get the counter ID stored in file->f_inode->i_private
+ */
+ if (!file->f_inode) {
+ WARN_ON_ONCE(1);
+ return -EBADF;
+ }
+ if ((long)(file->f_inode->i_private) != qstat_reset_cnts)
+ return count;
+
+ for_each_possible_cpu(cpu) {
+ int i;
+ unsigned long *ptr = per_cpu_ptr(qstats, cpu);
+
+ for (i = 0 ; i < qstat_num; i++)
+ WRITE_ONCE(ptr[i], 0);
+ for (i = 0 ; i < qstat_num; i++)
+ WRITE_ONCE(ptr[i], 0);
+ }
+ return count;
+}
+
+/*
+ * Debugfs data structures
+ */
+static const struct file_operations fops_qstat = {
+ .read = qstat_read,
+ .write = qstat_write,
+ .llseek = default_llseek,
+};
+
+/*
+ * Initialize debugfs for the qspinlock statistical counters
+ */
+static int __init init_qspinlock_stat(void)
+{
+ struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL);
+ int i;
+
+ if (!d_qstat) {
+ pr_warn("Could not create 'qlockstat' debugfs directory\n");
+ return 0;
+ }
+
+ /*
+ * Create the debugfs files
+ *
+ * As reading from and writing to the stat files can be slow, only
+ * root is allowed to do the read/write to limit impact to system
+ * performance.
+ */
+ for (i = 0; i < qstat_num; i++)
+ debugfs_create_file(qstat_names[i], 0400, d_qstat,
+ (void *)(long)i, &fops_qstat);
+
+ debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
+ (void *)(long)qstat_reset_cnts, &fops_qstat);
+ return 0;
+}
+fs_initcall(init_qspinlock_stat);
+
+/*
+ * Increment the PV qspinlock statistical counters
+ */
+static inline void qstat_inc(enum qlock_stats stat, bool cond)
+{
+ if (cond)
+ this_cpu_inc(qstats[stat]);
+}
+
+/*
+ * PV hash hop count
+ */
+static inline void qstat_hop(int hopcnt)
+{
+ this_cpu_add(qstats[qstat_pv_hash_hops], hopcnt);
+}
+
+/*
+ * Replacement function for pv_kick()
+ */
+static inline void __pv_kick(int cpu)
+{
+ u64 start = sched_clock();
+
+ per_cpu(pv_kick_time, cpu) = start;
+ pv_kick(cpu);
+ this_cpu_add(qstats[qstat_pv_latency_kick], sched_clock() - start);
+}
+
+/*
+ * Replacement function for pv_wait()
+ */
+static inline void __pv_wait(u8 *ptr, u8 val)
+{
+ u64 *pkick_time = this_cpu_ptr(&pv_kick_time);
+
+ *pkick_time = 0;
+ pv_wait(ptr, val);
+ if (*pkick_time) {
+ this_cpu_add(qstats[qstat_pv_latency_wake],
+ sched_clock() - *pkick_time);
+ qstat_inc(qstat_pv_kick_wake, true);
+ }
+}
+
+#define pv_kick(c) __pv_kick(c)
+#define pv_wait(p, v) __pv_wait(p, v)
+
+/*
+ * PV unfair trylock count tracking function
+ */
+static inline int qstat_spin_steal_lock(struct qspinlock *lock)
+{
+ int ret = pv_queued_spin_steal_lock(lock);
+
+ qstat_inc(qstat_pv_lock_stealing, ret);
+ return ret;
+}
+#undef queued_spin_trylock
+#define queued_spin_trylock(l) qstat_spin_steal_lock(l)
+
+#else /* CONFIG_QUEUED_LOCK_STAT */
+
+static inline void qstat_inc(enum qlock_stats stat, bool cond) { }
+static inline void qstat_hop(int hopcnt) { }
+
+#endif /* CONFIG_QUEUED_LOCK_STAT */
diff --git a/kernel/module.c b/kernel/module.c
index 8f051a106676..38c7bd5583ff 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3571,6 +3571,12 @@ static int load_module(struct load_info *info, const char __user *uargs,
synchronize_sched();
mutex_unlock(&module_mutex);
free_module:
+ /*
+ * Ftrace needs to clean up what it initialized.
+ * This does nothing if ftrace_module_init() wasn't called,
+ * but it must be called outside of module_mutex.
+ */
+ ftrace_release_mod(mod);
/* Free lock-classes; relies on the preceding sync_rcu() */
lockdep_free_key_range(mod->module_core, mod->core_size);
diff --git a/kernel/panic.c b/kernel/panic.c
index 4579dbb7ed87..b333380c6bb2 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -61,6 +61,17 @@ void __weak panic_smp_self_stop(void)
cpu_relax();
}
+/*
+ * Stop ourselves in NMI context if another CPU has already panicked. Arch code
+ * may override this to prepare for crash dumping, e.g. save regs info.
+ */
+void __weak nmi_panic_self_stop(struct pt_regs *regs)
+{
+ panic_smp_self_stop();
+}
+
+atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
+
/**
* panic - halt the system
* @fmt: The text string to print
@@ -71,17 +82,17 @@ void __weak panic_smp_self_stop(void)
*/
void panic(const char *fmt, ...)
{
- static DEFINE_SPINLOCK(panic_lock);
static char buf[1024];
va_list args;
long i, i_next = 0;
int state = 0;
+ int old_cpu, this_cpu;
/*
* Disable local interrupts. This will prevent panic_smp_self_stop
* from deadlocking the first cpu that invokes the panic, since
* there is nothing to prevent an interrupt handler (that runs
- * after the panic_lock is acquired) from invoking panic again.
+ * after setting panic_cpu) from invoking panic() again.
*/
local_irq_disable();
@@ -94,8 +105,16 @@ void panic(const char *fmt, ...)
* multiple parallel invocations of panic, all other CPUs either
* stop themself or will wait until they are stopped by the 1st CPU
* with smp_send_stop().
+ *
+ * `old_cpu == PANIC_CPU_INVALID' means this is the 1st CPU which
+ * comes here, so go ahead.
+ * `old_cpu == this_cpu' means we came from nmi_panic() which sets
+ * panic_cpu to this CPU. In this case, this is also the 1st CPU.
*/
- if (!spin_trylock(&panic_lock))
+ this_cpu = raw_smp_processor_id();
+ old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
+
+ if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
panic_smp_self_stop();
console_verbose();
@@ -117,9 +136,11 @@ void panic(const char *fmt, ...)
* everything else.
* If we want to run this after calling panic_notifiers, pass
* the "crash_kexec_post_notifiers" option to the kernel.
+ *
+ * Bypass the panic_cpu check and call __crash_kexec directly.
*/
if (!crash_kexec_post_notifiers)
- crash_kexec(NULL);
+ __crash_kexec(NULL);
/*
* Note smp_send_stop is the usual smp shutdown function, which
@@ -142,9 +163,11 @@ void panic(const char *fmt, ...)
* panic_notifiers and dumping kmsg before kdump.
* Note: since some panic_notifiers can make crashed kernel
* more unstable, it can increase risks of the kdump failure too.
+ *
+ * Bypass the panic_cpu check and call __crash_kexec directly.
*/
if (crash_kexec_post_notifiers)
- crash_kexec(NULL);
+ __crash_kexec(NULL);
bust_spinlocks(0);
@@ -152,8 +175,11 @@ void panic(const char *fmt, ...)
* We may have ended up stopping the CPU holding the lock (in
* smp_send_stop()) while still having some valuable data in the console
* buffer. Try to acquire the lock then release it regardless of the
- * result. The release will also print the buffers out.
+ * result. The release will also print the buffers out. Locks debug
+ * should be disabled to avoid reporting bad unlock balance when
+ * panic() is not being callled from OOPS.
*/
+ debug_locks_off();
console_trylock();
console_unlock();
diff --git a/kernel/pid.c b/kernel/pid.c
index ca368793808e..78b3d9f80d44 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -467,7 +467,7 @@ struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
rcu_read_lock();
if (type != PIDTYPE_PID)
task = task->group_leader;
- pid = get_pid(task->pids[type].pid);
+ pid = get_pid(rcu_dereference(task->pids[type].pid));
rcu_read_unlock();
return pid;
}
@@ -528,7 +528,7 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
if (likely(pid_alive(task))) {
if (type != PIDTYPE_PID)
task = task->group_leader;
- nr = pid_nr_ns(task->pids[type].pid, ns);
+ nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
}
rcu_read_unlock();
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index d89328e260df..d2988d047d66 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -162,6 +162,27 @@ static int rcu_torture_writer_state;
#define RTWS_SYNC 7
#define RTWS_STUTTER 8
#define RTWS_STOPPING 9
+static const char * const rcu_torture_writer_state_names[] = {
+ "RTWS_FIXED_DELAY",
+ "RTWS_DELAY",
+ "RTWS_REPLACE",
+ "RTWS_DEF_FREE",
+ "RTWS_EXP_SYNC",
+ "RTWS_COND_GET",
+ "RTWS_COND_SYNC",
+ "RTWS_SYNC",
+ "RTWS_STUTTER",
+ "RTWS_STOPPING",
+};
+
+static const char *rcu_torture_writer_state_getname(void)
+{
+ unsigned int i = READ_ONCE(rcu_torture_writer_state);
+
+ if (i >= ARRAY_SIZE(rcu_torture_writer_state_names))
+ return "???";
+ return rcu_torture_writer_state_names[i];
+}
#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
#define RCUTORTURE_RUNNABLE_INIT 1
@@ -1307,7 +1328,8 @@ rcu_torture_stats_print(void)
rcutorture_get_gp_data(cur_ops->ttype,
&flags, &gpnum, &completed);
- pr_alert("??? Writer stall state %d g%lu c%lu f%#x\n",
+ pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x\n",
+ rcu_torture_writer_state_getname(),
rcu_torture_writer_state,
gpnum, completed, flags);
show_rcu_gp_kthreads();
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index a63a1ea5a41b..9b9cdd549caa 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -489,7 +489,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
*/
void synchronize_srcu(struct srcu_struct *sp)
{
- __synchronize_srcu(sp, rcu_gp_is_expedited()
+ __synchronize_srcu(sp, (rcu_gp_is_expedited() && !rcu_gp_is_normal())
? SYNCHRONIZE_SRCU_EXP_TRYCOUNT
: SYNCHRONIZE_SRCU_TRYCOUNT);
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f07343b54fe5..e41dd4131f7a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -68,10 +68,6 @@ MODULE_ALIAS("rcutree");
/* Data structures. */
-static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
-static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
-static struct lock_class_key rcu_exp_class[RCU_NUM_LVLS];
-
/*
* In order to export the rcu_state name to the tracing tools, it
* needs to be added in the __tracepoint_string section.
@@ -246,24 +242,17 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
*/
void rcu_sched_qs(void)
{
- unsigned long flags;
-
- if (__this_cpu_read(rcu_sched_data.cpu_no_qs.s)) {
- trace_rcu_grace_period(TPS("rcu_sched"),
- __this_cpu_read(rcu_sched_data.gpnum),
- TPS("cpuqs"));
- __this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false);
- if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
- return;
- local_irq_save(flags);
- if (__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)) {
- __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false);
- rcu_report_exp_rdp(&rcu_sched_state,
- this_cpu_ptr(&rcu_sched_data),
- true);
- }
- local_irq_restore(flags);
- }
+ if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s))
+ return;
+ trace_rcu_grace_period(TPS("rcu_sched"),
+ __this_cpu_read(rcu_sched_data.gpnum),
+ TPS("cpuqs"));
+ __this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false);
+ if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
+ return;
+ __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false);
+ rcu_report_exp_rdp(&rcu_sched_state,
+ this_cpu_ptr(&rcu_sched_data), true);
}
void rcu_bh_qs(void)
@@ -300,17 +289,16 @@ EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr);
* We inform the RCU core by emulating a zero-duration dyntick-idle
* period, which we in turn do by incrementing the ->dynticks counter
* by two.
+ *
+ * The caller must have disabled interrupts.
*/
static void rcu_momentary_dyntick_idle(void)
{
- unsigned long flags;
struct rcu_data *rdp;
struct rcu_dynticks *rdtp;
int resched_mask;
struct rcu_state *rsp;
- local_irq_save(flags);
-
/*
* Yes, we can lose flag-setting operations. This is OK, because
* the flag will be set again after some delay.
@@ -340,13 +328,12 @@ static void rcu_momentary_dyntick_idle(void)
smp_mb__after_atomic(); /* Later stuff after QS. */
break;
}
- local_irq_restore(flags);
}
/*
* Note a context switch. This is a quiescent state for RCU-sched,
* and requires special handling for preemptible RCU.
- * The caller must have disabled preemption.
+ * The caller must have disabled interrupts.
*/
void rcu_note_context_switch(void)
{
@@ -376,9 +363,14 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
*/
void rcu_all_qs(void)
{
+ unsigned long flags;
+
barrier(); /* Avoid RCU read-side critical sections leaking down. */
- if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
+ if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) {
+ local_irq_save(flags);
rcu_momentary_dyntick_idle();
+ local_irq_restore(flags);
+ }
this_cpu_inc(rcu_qs_ctr);
barrier(); /* Avoid RCU read-side critical sections leaking up. */
}
@@ -605,25 +597,25 @@ static int rcu_future_needs_gp(struct rcu_state *rsp)
* The caller must have disabled interrupts to prevent races with
* normal callback registry.
*/
-static int
+static bool
cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
{
int i;
if (rcu_gp_in_progress(rsp))
- return 0; /* No, a grace period is already in progress. */
+ return false; /* No, a grace period is already in progress. */
if (rcu_future_needs_gp(rsp))
- return 1; /* Yes, a no-CBs CPU needs one. */
+ return true; /* Yes, a no-CBs CPU needs one. */
if (!rdp->nxttail[RCU_NEXT_TAIL])
- return 0; /* No, this is a no-CBs (or offline) CPU. */
+ return false; /* No, this is a no-CBs (or offline) CPU. */
if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
- return 1; /* Yes, this CPU has newly registered callbacks. */
+ return true; /* Yes, CPU has newly registered callbacks. */
for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
ULONG_CMP_LT(READ_ONCE(rsp->completed),
rdp->nxtcompleted[i]))
- return 1; /* Yes, CBs for future grace period. */
- return 0; /* No grace period needed. */
+ return true; /* Yes, CBs for future grace period. */
+ return false; /* No grace period needed. */
}
/*
@@ -740,7 +732,7 @@ void rcu_user_enter(void)
*
* Exit from an interrupt handler, which might possibly result in entering
* idle mode, in other words, leaving the mode in which read-side critical
- * sections can occur.
+ * sections can occur. The caller must have disabled interrupts.
*
* This code assumes that the idle loop never does anything that might
* result in unbalanced calls to irq_enter() and irq_exit(). If your
@@ -753,11 +745,10 @@ void rcu_user_enter(void)
*/
void rcu_irq_exit(void)
{
- unsigned long flags;
long long oldval;
struct rcu_dynticks *rdtp;
- local_irq_save(flags);
+ RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!");
rdtp = this_cpu_ptr(&rcu_dynticks);
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting--;
@@ -768,6 +759,17 @@ void rcu_irq_exit(void)
else
rcu_eqs_enter_common(oldval, true);
rcu_sysidle_enter(1);
+}
+
+/*
+ * Wrapper for rcu_irq_exit() where interrupts are enabled.
+ */
+void rcu_irq_exit_irqson(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ rcu_irq_exit();
local_irq_restore(flags);
}
@@ -865,7 +867,7 @@ void rcu_user_exit(void)
*
* Enter an interrupt handler, which might possibly result in exiting
* idle mode, in other words, entering the mode in which read-side critical
- * sections can occur.
+ * sections can occur. The caller must have disabled interrupts.
*
* Note that the Linux kernel is fully capable of entering an interrupt
* handler that it never exits, for example when doing upcalls to
@@ -881,11 +883,10 @@ void rcu_user_exit(void)
*/
void rcu_irq_enter(void)
{
- unsigned long flags;
struct rcu_dynticks *rdtp;
long long oldval;
- local_irq_save(flags);
+ RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!");
rdtp = this_cpu_ptr(&rcu_dynticks);
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting++;
@@ -896,6 +897,17 @@ void rcu_irq_enter(void)
else
rcu_eqs_exit_common(oldval, true);
rcu_sysidle_exit(1);
+}
+
+/*
+ * Wrapper for rcu_irq_enter() where interrupts are enabled.
+ */
+void rcu_irq_enter_irqson(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ rcu_irq_enter();
local_irq_restore(flags);
}
@@ -1187,6 +1199,16 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
}
/*
+ * Convert a ->gp_state value to a character string.
+ */
+static const char *gp_state_getname(short gs)
+{
+ if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names))
+ return "???";
+ return gp_state_names[gs];
+}
+
+/*
* Complain about starvation of grace-period kthread.
*/
static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
@@ -1196,12 +1218,16 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
j = jiffies;
gpa = READ_ONCE(rsp->gp_activity);
- if (j - gpa > 2 * HZ)
- pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x s%d ->state=%#lx\n",
+ if (j - gpa > 2 * HZ) {
+ pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx\n",
rsp->name, j - gpa,
rsp->gpnum, rsp->completed,
- rsp->gp_flags, rsp->gp_state,
- rsp->gp_kthread ? rsp->gp_kthread->state : 0);
+ rsp->gp_flags,
+ gp_state_getname(rsp->gp_state), rsp->gp_state,
+ rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
+ if (rsp->gp_kthread)
+ sched_show_task(rsp->gp_kthread);
+ }
}
/*
@@ -1214,7 +1240,7 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
struct rcu_node *rnp;
rcu_for_each_leaf_node(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->qsmask != 0) {
for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
if (rnp->qsmask & (1UL << cpu))
@@ -1237,7 +1263,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
/* Only let one CPU complain about others per time interval. */
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
delta = jiffies - READ_ONCE(rsp->jiffies_stall);
if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1256,7 +1282,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
rsp->name);
print_cpu_stall_info_begin();
rcu_for_each_leaf_node(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
ndetected += rcu_print_task_stall(rnp);
if (rnp->qsmask != 0) {
for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
@@ -1327,7 +1353,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
rcu_dump_cpu_stacks(rsp);
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall)))
WRITE_ONCE(rsp->jiffies_stall,
jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
@@ -1534,10 +1560,8 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
* hold it, acquire the root rcu_node structure's lock in order to
* start one (if needed).
*/
- if (rnp != rnp_root) {
- raw_spin_lock(&rnp_root->lock);
- smp_mb__after_unlock_lock();
- }
+ if (rnp != rnp_root)
+ raw_spin_lock_rcu_node(rnp_root);
/*
* Get a new grace-period number. If there really is no grace
@@ -1786,11 +1810,10 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
if ((rdp->gpnum == READ_ONCE(rnp->gpnum) &&
rdp->completed == READ_ONCE(rnp->completed) &&
!unlikely(READ_ONCE(rdp->gpwrap))) || /* w/out lock. */
- !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
+ !raw_spin_trylock_rcu_node(rnp)) { /* irqs already off, so later. */
local_irq_restore(flags);
return;
}
- smp_mb__after_unlock_lock();
needwake = __note_gp_changes(rsp, rnp, rdp);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
if (needwake)
@@ -1805,21 +1828,20 @@ static void rcu_gp_slow(struct rcu_state *rsp, int delay)
}
/*
- * Initialize a new grace period. Return 0 if no grace period required.
+ * Initialize a new grace period. Return false if no grace period required.
*/
-static int rcu_gp_init(struct rcu_state *rsp)
+static bool rcu_gp_init(struct rcu_state *rsp)
{
unsigned long oldmask;
struct rcu_data *rdp;
struct rcu_node *rnp = rcu_get_root(rsp);
WRITE_ONCE(rsp->gp_activity, jiffies);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
if (!READ_ONCE(rsp->gp_flags)) {
/* Spurious wakeup, tell caller to go back to sleep. */
raw_spin_unlock_irq(&rnp->lock);
- return 0;
+ return false;
}
WRITE_ONCE(rsp->gp_flags, 0); /* Clear all flags: New grace period. */
@@ -1829,7 +1851,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
* Not supposed to be able to happen.
*/
raw_spin_unlock_irq(&rnp->lock);
- return 0;
+ return false;
}
/* Advance to a new grace period and initialize state. */
@@ -1847,8 +1869,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
*/
rcu_for_each_leaf_node(rsp, rnp) {
rcu_gp_slow(rsp, gp_preinit_delay);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
!rnp->wait_blkd_tasks) {
/* Nothing to do on this leaf rcu_node structure. */
@@ -1904,8 +1925,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
*/
rcu_for_each_node_breadth_first(rsp, rnp) {
rcu_gp_slow(rsp, gp_init_delay);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
rdp = this_cpu_ptr(rsp->rda);
rcu_preempt_check_blocked_tasks(rnp);
rnp->qsmask = rnp->qsmaskinit;
@@ -1923,7 +1943,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
WRITE_ONCE(rsp->gp_activity, jiffies);
}
- return 1;
+ return true;
}
/*
@@ -1973,8 +1993,7 @@ static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time)
}
/* Clear flag to prevent immediate re-entry. */
if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
WRITE_ONCE(rsp->gp_flags,
READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS);
raw_spin_unlock_irq(&rnp->lock);
@@ -1993,8 +2012,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
struct rcu_node *rnp = rcu_get_root(rsp);
WRITE_ONCE(rsp->gp_activity, jiffies);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
gp_duration = jiffies - rsp->gp_start;
if (gp_duration > rsp->gp_max)
rsp->gp_max = gp_duration;
@@ -2019,8 +2037,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
* grace period is recorded in any of the rcu_node structures.
*/
rcu_for_each_node_breadth_first(rsp, rnp) {
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
WARN_ON_ONCE(rnp->qsmask);
WRITE_ONCE(rnp->completed, rsp->gpnum);
@@ -2035,8 +2052,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
rcu_gp_slow(rsp, gp_cleanup_delay);
}
rnp = rcu_get_root(rsp);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock(); /* Order GP before ->completed update. */
+ raw_spin_lock_irq_rcu_node(rnp); /* Order GP before ->completed update. */
rcu_nocb_gp_set(rnp, nocb);
/* Declare grace period done. */
@@ -2284,8 +2300,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
raw_spin_unlock_irqrestore(&rnp->lock, flags);
rnp_c = rnp;
rnp = rnp->parent;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
oldmask = rnp_c->qsmask;
}
@@ -2332,8 +2347,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
gps = rnp->gpnum;
mask = rnp->grpmask;
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
- raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp_p); /* irqs already disabled. */
rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags);
}
@@ -2355,8 +2369,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
struct rcu_node *rnp;
rnp = rdp->mynode;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if ((rdp->cpu_no_qs.b.norm &&
rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) ||
rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum ||
@@ -2582,8 +2595,7 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
rnp = rnp->parent;
if (!rnp)
break;
- raw_spin_lock(&rnp->lock); /* irqs already disabled. */
- smp_mb__after_unlock_lock(); /* GP memory ordering. */
+ raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
rnp->qsmaskinit &= ~mask;
rnp->qsmask &= ~mask;
if (rnp->qsmaskinit) {
@@ -2611,8 +2623,7 @@ static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
mask = rdp->grpmask;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock(); /* Enforce GP memory-order guarantee. */
+ raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
rnp->qsmaskinitnext &= ~mask;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
@@ -2809,8 +2820,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
rcu_for_each_leaf_node(rsp, rnp) {
cond_resched_rcu_qs();
mask = 0;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->qsmask == 0) {
if (rcu_state_p == &rcu_sched_state ||
rsp != rcu_state_p ||
@@ -2881,8 +2891,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
/* rnp_old == rcu_get_root(rsp), rnp == NULL. */
/* Reached the root of the rcu_node tree, acquire lock. */
- raw_spin_lock_irqsave(&rnp_old->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp_old, flags);
raw_spin_unlock(&rnp_old->fqslock);
if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
rsp->n_force_qs_lh++;
@@ -2914,7 +2923,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
/* Does this CPU require a not-yet-started grace period? */
local_irq_save(flags);
if (cpu_needs_another_gp(rsp, rdp)) {
- raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
+ raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */
needwake = rcu_start_gp(rsp);
raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
if (needwake)
@@ -3005,8 +3014,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
if (!rcu_gp_in_progress(rsp)) {
struct rcu_node *rnp_root = rcu_get_root(rsp);
- raw_spin_lock(&rnp_root->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp_root);
needwake = rcu_start_gp(rsp);
raw_spin_unlock(&rnp_root->lock);
if (needwake)
@@ -3365,7 +3373,6 @@ static unsigned long rcu_seq_snap(unsigned long *sp)
{
unsigned long s;
- smp_mb(); /* Caller's modifications seen first by other CPUs. */
s = (READ_ONCE(*sp) + 3) & ~0x1;
smp_mb(); /* Above access must not bleed into critical section. */
return s;
@@ -3392,6 +3399,7 @@ static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
}
static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
{
+ smp_mb(); /* Caller's modifications seen first by other CPUs. */
return rcu_seq_snap(&rsp->expedited_sequence);
}
static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
@@ -3426,8 +3434,7 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
* CPUs for the current rcu_node structure up the rcu_node tree.
*/
rcu_for_each_leaf_node(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->expmaskinit == rnp->expmaskinitnext) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
continue; /* No new CPUs, nothing to do. */
@@ -3447,8 +3454,7 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
rnp_up = rnp->parent;
done = false;
while (rnp_up) {
- raw_spin_lock_irqsave(&rnp_up->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp_up, flags);
if (rnp_up->expmaskinit)
done = true;
rnp_up->expmaskinit |= mask;
@@ -3472,8 +3478,7 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
sync_exp_reset_tree_hotplug(rsp);
rcu_for_each_node_breadth_first(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
WARN_ON_ONCE(rnp->expmask);
rnp->expmask = rnp->expmaskinit;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -3531,8 +3536,7 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
mask = rnp->grpmask;
raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
rnp = rnp->parent;
- raw_spin_lock(&rnp->lock); /* irqs already disabled */
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
WARN_ON_ONCE(!(rnp->expmask & mask));
rnp->expmask &= ~mask;
}
@@ -3549,8 +3553,7 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
{
unsigned long flags;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
__rcu_report_exp_rnp(rsp, rnp, wake, flags);
}
@@ -3564,8 +3567,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
{
unsigned long flags;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (!(rnp->expmask & mask)) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
@@ -3609,7 +3611,7 @@ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
*/
static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
{
- struct rcu_data *rdp;
+ struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
struct rcu_node *rnp0;
struct rcu_node *rnp1 = NULL;
@@ -3623,7 +3625,7 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
if (sync_exp_work_done(rsp, rnp0, NULL,
- &rsp->expedited_workdone0, s))
+ &rdp->expedited_workdone0, s))
return NULL;
return rnp0;
}
@@ -3637,14 +3639,13 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
* can be inexact, as it is just promoting locality and is not
* strictly needed for correctness.
*/
- rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
- if (sync_exp_work_done(rsp, NULL, NULL, &rsp->expedited_workdone1, s))
+ if (sync_exp_work_done(rsp, NULL, NULL, &rdp->expedited_workdone1, s))
return NULL;
mutex_lock(&rdp->exp_funnel_mutex);
rnp0 = rdp->mynode;
for (; rnp0 != NULL; rnp0 = rnp0->parent) {
if (sync_exp_work_done(rsp, rnp1, rdp,
- &rsp->expedited_workdone2, s))
+ &rdp->expedited_workdone2, s))
return NULL;
mutex_lock(&rnp0->exp_funnel_mutex);
if (rnp1)
@@ -3654,7 +3655,7 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
rnp1 = rnp0;
}
if (sync_exp_work_done(rsp, rnp1, rdp,
- &rsp->expedited_workdone3, s))
+ &rdp->expedited_workdone3, s))
return NULL;
return rnp1;
}
@@ -3708,8 +3709,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
sync_exp_reset_tree(rsp);
rcu_for_each_leaf_node(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
/* Each pass checks a CPU for identity, offline, and idle. */
mask_ofl_test = 0;
@@ -3741,24 +3741,22 @@ retry_ipi:
ret = smp_call_function_single(cpu, func, rsp, 0);
if (!ret) {
mask_ofl_ipi &= ~mask;
- } else {
- /* Failed, raced with offline. */
- raw_spin_lock_irqsave(&rnp->lock, flags);
- if (cpu_online(cpu) &&
- (rnp->expmask & mask)) {
- raw_spin_unlock_irqrestore(&rnp->lock,
- flags);
- schedule_timeout_uninterruptible(1);
- if (cpu_online(cpu) &&
- (rnp->expmask & mask))
- goto retry_ipi;
- raw_spin_lock_irqsave(&rnp->lock,
- flags);
- }
- if (!(rnp->expmask & mask))
- mask_ofl_ipi &= ~mask;
+ continue;
+ }
+ /* Failed, raced with offline. */
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ if (cpu_online(cpu) &&
+ (rnp->expmask & mask)) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ schedule_timeout_uninterruptible(1);
+ if (cpu_online(cpu) &&
+ (rnp->expmask & mask))
+ goto retry_ipi;
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
}
+ if (!(rnp->expmask & mask))
+ mask_ofl_ipi &= ~mask;
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
/* Report quiescent states for those that went offline. */
mask_ofl_test |= mask_ofl_ipi;
@@ -3773,6 +3771,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
unsigned long jiffies_stall;
unsigned long jiffies_start;
unsigned long mask;
+ int ndetected;
struct rcu_node *rnp;
struct rcu_node *rnp_root = rcu_get_root(rsp);
int ret;
@@ -3785,7 +3784,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
rsp->expedited_wq,
sync_rcu_preempt_exp_done(rnp_root),
jiffies_stall);
- if (ret > 0)
+ if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root))
return;
if (ret < 0) {
/* Hit a signal, disable CPU stall warnings. */
@@ -3795,14 +3794,16 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
}
pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
rsp->name);
+ ndetected = 0;
rcu_for_each_leaf_node(rsp, rnp) {
- (void)rcu_print_task_exp_stall(rnp);
+ ndetected = rcu_print_task_exp_stall(rnp);
mask = 1;
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
struct rcu_data *rdp;
if (!(rnp->expmask & mask))
continue;
+ ndetected++;
rdp = per_cpu_ptr(rsp->rda, cpu);
pr_cont(" %d-%c%c%c", cpu,
"O."[cpu_online(cpu)],
@@ -3811,8 +3812,23 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
}
mask <<= 1;
}
- pr_cont(" } %lu jiffies s: %lu\n",
- jiffies - jiffies_start, rsp->expedited_sequence);
+ pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
+ jiffies - jiffies_start, rsp->expedited_sequence,
+ rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
+ if (!ndetected) {
+ pr_err("blocking rcu_node structures:");
+ rcu_for_each_node_breadth_first(rsp, rnp) {
+ if (rnp == rnp_root)
+ continue; /* printed unconditionally */
+ if (sync_rcu_preempt_exp_done(rnp))
+ continue;
+ pr_cont(" l=%u:%d-%d:%#lx/%c",
+ rnp->level, rnp->grplo, rnp->grphi,
+ rnp->expmask,
+ ".T"[!!rnp->exp_tasks]);
+ }
+ pr_cont("\n");
+ }
rcu_for_each_leaf_node(rsp, rnp) {
mask = 1;
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
@@ -3847,6 +3863,16 @@ void synchronize_sched_expedited(void)
struct rcu_node *rnp;
struct rcu_state *rsp = &rcu_sched_state;
+ /* If only one CPU, this is automatically a grace period. */
+ if (rcu_blocking_is_gp())
+ return;
+
+ /* If expedited grace periods are prohibited, fall back to normal. */
+ if (rcu_gp_is_normal()) {
+ wait_rcu_gp(call_rcu_sched);
+ return;
+ }
+
/* Take a snapshot of the sequence number. */
s = rcu_exp_gp_seq_snap(rsp);
@@ -4135,7 +4161,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
rnp = rnp->parent;
if (rnp == NULL)
return;
- raw_spin_lock(&rnp->lock); /* Interrupts already disabled. */
+ raw_spin_lock_rcu_node(rnp); /* Interrupts already disabled. */
rnp->qsmaskinit |= mask;
raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
}
@@ -4152,7 +4178,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
struct rcu_node *rnp = rcu_get_root(rsp);
/* Set up local state, ensuring consistent view of global state. */
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
@@ -4179,7 +4205,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
struct rcu_node *rnp = rcu_get_root(rsp);
/* Set up local state, ensuring consistent view of global state. */
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
rdp->qlen_last_fqs_check = 0;
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->blimit = blimit;
@@ -4198,8 +4224,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
*/
rnp = rdp->mynode;
mask = rdp->grpmask;
- raw_spin_lock(&rnp->lock); /* irqs already disabled. */
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
rnp->qsmaskinitnext |= mask;
rnp->expmaskinitnext |= mask;
if (!rdp->beenonline)
@@ -4327,14 +4352,14 @@ static int __init rcu_spawn_gp_kthread(void)
t = kthread_create(rcu_gp_kthread, rsp, "%s", rsp->name);
BUG_ON(IS_ERR(t));
rnp = rcu_get_root(rsp);
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
rsp->gp_kthread = t;
if (kthread_prio) {
sp.sched_priority = kthread_prio;
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
}
- wake_up_process(t);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ wake_up_process(t);
}
rcu_spawn_nocb_kthreads();
rcu_spawn_boost_kthreads();
@@ -4385,12 +4410,14 @@ static void __init rcu_init_levelspread(int *levelspread, const int *levelcnt)
/*
* Helper function for rcu_init() that initializes one rcu_state structure.
*/
-static void __init rcu_init_one(struct rcu_state *rsp,
- struct rcu_data __percpu *rda)
+static void __init rcu_init_one(struct rcu_state *rsp)
{
static const char * const buf[] = RCU_NODE_NAME_INIT;
static const char * const fqs[] = RCU_FQS_NAME_INIT;
static const char * const exp[] = RCU_EXP_NAME_INIT;
+ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
+ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
+ static struct lock_class_key rcu_exp_class[RCU_NUM_LVLS];
static u8 fl_mask = 0x1;
int levelcnt[RCU_NUM_LVLS]; /* # nodes in each level. */
@@ -4576,8 +4603,8 @@ void __init rcu_init(void)
rcu_bootup_announce();
rcu_init_geometry();
- rcu_init_one(&rcu_bh_state, &rcu_bh_data);
- rcu_init_one(&rcu_sched_state, &rcu_sched_data);
+ rcu_init_one(&rcu_bh_state);
+ rcu_init_one(&rcu_sched_state);
if (dump_tree)
rcu_dump_rcu_node_tree(&rcu_sched_state);
__rcu_init_preempt();
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 9fb4e238d4dc..83360b4f4352 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -178,6 +178,8 @@ struct rcu_node {
/* beginning of each expedited GP. */
unsigned long expmaskinitnext;
/* Online CPUs for next expedited GP. */
+ /* Any CPU that has ever been online will */
+ /* have its bit set. */
unsigned long grpmask; /* Mask to apply to parent qsmask. */
/* Only one bit will be set in this mask. */
int grplo; /* lowest-numbered CPU or group here. */
@@ -384,6 +386,10 @@ struct rcu_data {
struct rcu_head oom_head;
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
struct mutex exp_funnel_mutex;
+ atomic_long_t expedited_workdone0; /* # done by others #0. */
+ atomic_long_t expedited_workdone1; /* # done by others #1. */
+ atomic_long_t expedited_workdone2; /* # done by others #2. */
+ atomic_long_t expedited_workdone3; /* # done by others #3. */
/* 7) Callback offloading. */
#ifdef CONFIG_RCU_NOCB_CPU
@@ -498,10 +504,6 @@ struct rcu_state {
/* End of fields guarded by barrier_mutex. */
unsigned long expedited_sequence; /* Take a ticket. */
- atomic_long_t expedited_workdone0; /* # done by others #0. */
- atomic_long_t expedited_workdone1; /* # done by others #1. */
- atomic_long_t expedited_workdone2; /* # done by others #2. */
- atomic_long_t expedited_workdone3; /* # done by others #3. */
atomic_long_t expedited_normal; /* # fallbacks to normal. */
atomic_t expedited_need_qs; /* # CPUs left to check in. */
wait_queue_head_t expedited_wq; /* Wait for check-ins. */
@@ -545,6 +547,18 @@ struct rcu_state {
#define RCU_GP_CLEANUP 5 /* Grace-period cleanup started. */
#define RCU_GP_CLEANED 6 /* Grace-period cleanup complete. */
+#ifndef RCU_TREE_NONCORE
+static const char * const gp_state_names[] = {
+ "RCU_GP_IDLE",
+ "RCU_GP_WAIT_GPS",
+ "RCU_GP_DONE_GPS",
+ "RCU_GP_WAIT_FQS",
+ "RCU_GP_DOING_FQS",
+ "RCU_GP_CLEANUP",
+ "RCU_GP_CLEANED",
+};
+#endif /* #ifndef RCU_TREE_NONCORE */
+
extern struct list_head rcu_struct_flavors;
/* Sequence through rcu_state structures for each RCU flavor. */
@@ -664,3 +678,42 @@ static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
#else /* #ifdef CONFIG_PPC */
#define smp_mb__after_unlock_lock() do { } while (0)
#endif /* #else #ifdef CONFIG_PPC */
+
+/*
+ * Wrappers for the rcu_node::lock acquire.
+ *
+ * Because the rcu_nodes form a tree, the tree traversal locking will observe
+ * different lock values, this in turn means that an UNLOCK of one level
+ * followed by a LOCK of another level does not imply a full memory barrier;
+ * and most importantly transitivity is lost.
+ *
+ * In order to restore full ordering between tree levels, augment the regular
+ * lock acquire functions with smp_mb__after_unlock_lock().
+ */
+static inline void raw_spin_lock_rcu_node(struct rcu_node *rnp)
+{
+ raw_spin_lock(&rnp->lock);
+ smp_mb__after_unlock_lock();
+}
+
+static inline void raw_spin_lock_irq_rcu_node(struct rcu_node *rnp)
+{
+ raw_spin_lock_irq(&rnp->lock);
+ smp_mb__after_unlock_lock();
+}
+
+#define raw_spin_lock_irqsave_rcu_node(rnp, flags) \
+do { \
+ typecheck(unsigned long, flags); \
+ raw_spin_lock_irqsave(&(rnp)->lock, flags); \
+ smp_mb__after_unlock_lock(); \
+} while (0)
+
+static inline bool raw_spin_trylock_rcu_node(struct rcu_node *rnp)
+{
+ bool locked = raw_spin_trylock(&rnp->lock);
+
+ if (locked)
+ smp_mb__after_unlock_lock();
+ return locked;
+}
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 630c19772630..9467a8b7e756 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -63,8 +63,7 @@ static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */
/*
* Check the RCU kernel configuration parameters and print informative
- * messages about anything out of the ordinary. If you like #ifdef, you
- * will love this function.
+ * messages about anything out of the ordinary.
*/
static void __init rcu_bootup_announce_oddness(void)
{
@@ -147,8 +146,8 @@ static void __init rcu_bootup_announce(void)
* the corresponding expedited grace period will also be the end of the
* normal grace period.
*/
-static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp,
- unsigned long flags) __releases(rnp->lock)
+static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
+ __releases(rnp->lock) /* But leaves rrupts disabled. */
{
int blkd_state = (rnp->gp_tasks ? RCU_GP_TASKS : 0) +
(rnp->exp_tasks ? RCU_EXP_TASKS : 0) +
@@ -236,7 +235,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp,
rnp->gp_tasks = &t->rcu_node_entry;
if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
rnp->exp_tasks = &t->rcu_node_entry;
- raw_spin_unlock(&rnp->lock);
+ raw_spin_unlock(&rnp->lock); /* rrupts remain disabled. */
/*
* Report the quiescent state for the expedited GP. This expedited
@@ -251,7 +250,6 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp,
} else {
WARN_ON_ONCE(t->rcu_read_unlock_special.b.exp_need_qs);
}
- local_irq_restore(flags);
}
/*
@@ -286,12 +284,11 @@ static void rcu_preempt_qs(void)
* predating the current grace period drain, in other words, until
* rnp->gp_tasks becomes NULL.
*
- * Caller must disable preemption.
+ * Caller must disable interrupts.
*/
static void rcu_preempt_note_context_switch(void)
{
struct task_struct *t = current;
- unsigned long flags;
struct rcu_data *rdp;
struct rcu_node *rnp;
@@ -301,8 +298,7 @@ static void rcu_preempt_note_context_switch(void)
/* Possibly blocking in an RCU read-side critical section. */
rdp = this_cpu_ptr(rcu_state_p->rda);
rnp = rdp->mynode;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp);
t->rcu_read_unlock_special.b.blocked = true;
t->rcu_blocked_node = rnp;
@@ -318,7 +314,7 @@ static void rcu_preempt_note_context_switch(void)
(rnp->qsmask & rdp->grpmask)
? rnp->gpnum
: rnp->gpnum + 1);
- rcu_preempt_ctxt_queue(rnp, rdp, flags);
+ rcu_preempt_ctxt_queue(rnp, rdp);
} else if (t->rcu_read_lock_nesting < 0 &&
t->rcu_read_unlock_special.s) {
@@ -450,20 +446,13 @@ void rcu_read_unlock_special(struct task_struct *t)
/*
* Remove this task from the list it blocked on. The task
- * now remains queued on the rcu_node corresponding to
- * the CPU it first blocked on, so the first attempt to
- * acquire the task's rcu_node's ->lock will succeed.
- * Keep the loop and add a WARN_ON() out of sheer paranoia.
+ * now remains queued on the rcu_node corresponding to the
+ * CPU it first blocked on, so there is no longer any need
+ * to loop. Retain a WARN_ON_ONCE() out of sheer paranoia.
*/
- for (;;) {
- rnp = t->rcu_blocked_node;
- raw_spin_lock(&rnp->lock); /* irqs already disabled. */
- smp_mb__after_unlock_lock();
- if (rnp == t->rcu_blocked_node)
- break;
- WARN_ON_ONCE(1);
- raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
- }
+ rnp = t->rcu_blocked_node;
+ raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
+ WARN_ON_ONCE(rnp != t->rcu_blocked_node);
empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
empty_exp = sync_rcu_preempt_exp_done(rnp);
smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
@@ -527,7 +516,7 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
unsigned long flags;
struct task_struct *t;
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (!rcu_preempt_blocked_readers_cgp(rnp)) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
@@ -748,6 +737,12 @@ void synchronize_rcu_expedited(void)
struct rcu_state *rsp = rcu_state_p;
unsigned long s;
+ /* If expedited grace periods are prohibited, fall back to normal. */
+ if (rcu_gp_is_normal()) {
+ wait_rcu_gp(call_rcu);
+ return;
+ }
+
s = rcu_exp_gp_seq_snap(rsp);
rnp_unlock = exp_funnel_lock(rsp, s);
@@ -788,7 +783,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier);
*/
static void __init __rcu_init_preempt(void)
{
- rcu_init_one(rcu_state_p, rcu_data_p);
+ rcu_init_one(rcu_state_p);
}
/*
@@ -989,8 +984,7 @@ static int rcu_boost(struct rcu_node *rnp)
READ_ONCE(rnp->boost_tasks) == NULL)
return 0; /* Nothing left to boost. */
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
/*
* Recheck under the lock: all tasks in need of boosting
@@ -1176,8 +1170,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
"rcub/%d", rnp_index);
if (IS_ERR(t))
return PTR_ERR(t);
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
rnp->boost_kthread_task = t;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
sp.sched_priority = kthread_prio;
@@ -1524,7 +1517,8 @@ static void rcu_prepare_for_idle(void)
struct rcu_state *rsp;
int tne;
- if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL))
+ if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) ||
+ rcu_is_nocb_cpu(smp_processor_id()))
return;
/* Handle nohz enablement switches conservatively. */
@@ -1538,10 +1532,6 @@ static void rcu_prepare_for_idle(void)
if (!tne)
return;
- /* If this is a no-CBs CPU, no callbacks, just return. */
- if (rcu_is_nocb_cpu(smp_processor_id()))
- return;
-
/*
* If a non-lazy callback arrived at a CPU having only lazy
* callbacks, invoke RCU core for the side-effect of recalculating
@@ -1567,8 +1557,7 @@ static void rcu_prepare_for_idle(void)
if (!*rdp->nxttail[RCU_DONE_TAIL])
continue;
rnp = rdp->mynode;
- raw_spin_lock(&rnp->lock); /* irqs already disabled. */
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
if (needwake)
@@ -2068,8 +2057,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
bool needwake;
struct rcu_node *rnp = rdp->mynode;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
needwake = rcu_start_future_gp(rnp, rdp, &c);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
if (needwake)
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index ef7093cc9b5c..1088e64f01ad 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -1,5 +1,5 @@
/*
- * Read-Copy Update tracing for classic implementation
+ * Read-Copy Update tracing for hierarchical implementation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,6 +16,7 @@
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* Copyright IBM Corporation, 2008
+ * Author: Paul E. McKenney
*
* Papers: http://www.rdrop.com/users/paulmck/RCU
*
@@ -33,9 +34,7 @@
#include <linux/sched.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
-#include <linux/module.h>
#include <linux/completion.h>
-#include <linux/moduleparam.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
@@ -183,14 +182,20 @@ static const struct file_operations rcudata_fops = {
static int show_rcuexp(struct seq_file *m, void *v)
{
+ int cpu;
struct rcu_state *rsp = (struct rcu_state *)m->private;
-
+ struct rcu_data *rdp;
+ unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
+
+ for_each_possible_cpu(cpu) {
+ rdp = per_cpu_ptr(rsp->rda, cpu);
+ s0 += atomic_long_read(&rdp->expedited_workdone0);
+ s1 += atomic_long_read(&rdp->expedited_workdone1);
+ s2 += atomic_long_read(&rdp->expedited_workdone2);
+ s3 += atomic_long_read(&rdp->expedited_workdone3);
+ }
seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
- rsp->expedited_sequence,
- atomic_long_read(&rsp->expedited_workdone0),
- atomic_long_read(&rsp->expedited_workdone1),
- atomic_long_read(&rsp->expedited_workdone2),
- atomic_long_read(&rsp->expedited_workdone3),
+ rsp->expedited_sequence, s0, s1, s2, s3,
atomic_long_read(&rsp->expedited_normal),
atomic_read(&rsp->expedited_need_qs),
rsp->expedited_sequence / 2);
@@ -319,7 +324,7 @@ static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
unsigned long gpmax;
struct rcu_node *rnp = &rsp->node[0];
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
completed = READ_ONCE(rsp->completed);
gpnum = READ_ONCE(rsp->gpnum);
if (completed == gpnum)
@@ -487,16 +492,4 @@ free_out:
debugfs_remove_recursive(rcudir);
return 1;
}
-
-static void __exit rcutree_trace_cleanup(void)
-{
- debugfs_remove_recursive(rcudir);
-}
-
-
-module_init(rcutree_trace_init);
-module_exit(rcutree_trace_cleanup);
-
-MODULE_AUTHOR("Paul E. McKenney");
-MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation");
-MODULE_LICENSE("GPL");
+device_initcall(rcutree_trace_init);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 5f748c5a40f0..76b94e19430b 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -60,7 +60,12 @@ MODULE_ALIAS("rcupdate");
#endif
#define MODULE_PARAM_PREFIX "rcupdate."
+#ifndef CONFIG_TINY_RCU
module_param(rcu_expedited, int, 0);
+module_param(rcu_normal, int, 0);
+static int rcu_normal_after_boot;
+module_param(rcu_normal_after_boot, int, 0);
+#endif /* #ifndef CONFIG_TINY_RCU */
#if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_PREEMPT_COUNT)
/**
@@ -113,6 +118,17 @@ EXPORT_SYMBOL(rcu_read_lock_sched_held);
#ifndef CONFIG_TINY_RCU
+/*
+ * Should expedited grace-period primitives always fall back to their
+ * non-expedited counterparts? Intended for use within RCU. Note
+ * that if the user specifies both rcu_expedited and rcu_normal, then
+ * rcu_normal wins.
+ */
+bool rcu_gp_is_normal(void)
+{
+ return READ_ONCE(rcu_normal);
+}
+
static atomic_t rcu_expedited_nesting =
ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0);
@@ -157,8 +173,6 @@ void rcu_unexpedite_gp(void)
}
EXPORT_SYMBOL_GPL(rcu_unexpedite_gp);
-#endif /* #ifndef CONFIG_TINY_RCU */
-
/*
* Inform RCU of the end of the in-kernel boot sequence.
*/
@@ -166,8 +180,12 @@ void rcu_end_inkernel_boot(void)
{
if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT))
rcu_unexpedite_gp();
+ if (rcu_normal_after_boot)
+ WRITE_ONCE(rcu_normal, 1);
}
+#endif /* #ifndef CONFIG_TINY_RCU */
+
#ifdef CONFIG_PREEMPT_RCU
/*
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index 750ed601ddf7..a5d966cb8891 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -212,7 +212,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
ag = autogroup_task_get(p);
down_write(&ag->lock);
- err = sched_group_set_shares(ag->tg, prio_to_weight[nice + 20]);
+ err = sched_group_set_shares(ag->tg, sched_prio_to_weight[nice + 20]);
if (!err)
ag->nice = nice;
up_write(&ag->lock);
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index c0a205101c23..bc54e84675da 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -1,7 +1,7 @@
/*
* sched_clock for unstable cpu clocks
*
- * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra
*
* Updates and enhancements:
* Copyright (C) 2008 Red Hat, Inc. Steven Rostedt <srostedt@redhat.com>
@@ -354,7 +354,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
return;
sched_clock_tick();
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog_sched();
}
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4d568ac9319e..77d97a6fc715 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -731,7 +731,7 @@ bool sched_can_stop_tick(void)
if (current->policy == SCHED_RR) {
struct sched_rt_entity *rt_se = &current->rt;
- return rt_se->run_list.prev == rt_se->run_list.next;
+ return list_is_singular(&rt_se->run_list);
}
/*
@@ -823,8 +823,8 @@ static void set_load_weight(struct task_struct *p)
return;
}
- load->weight = scale_load(prio_to_weight[prio]);
- load->inv_weight = prio_to_wmult[prio];
+ load->weight = scale_load(sched_prio_to_weight[prio]);
+ load->inv_weight = sched_prio_to_wmult[prio];
}
static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -1071,8 +1071,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new
{
lockdep_assert_held(&rq->lock);
- dequeue_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_MIGRATING;
+ dequeue_task(rq, p, 0);
set_task_cpu(p, new_cpu);
raw_spin_unlock(&rq->lock);
@@ -1080,8 +1080,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new
raw_spin_lock(&rq->lock);
BUG_ON(task_cpu(p) != new_cpu);
- p->on_rq = TASK_ON_RQ_QUEUED;
enqueue_task(rq, p, 0);
+ p->on_rq = TASK_ON_RQ_QUEUED;
check_preempt_curr(rq, p, 0);
return rq;
@@ -1274,6 +1274,15 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
!p->on_rq);
+ /*
+ * Migrating fair class task must have p->on_rq = TASK_ON_RQ_MIGRATING,
+ * because schedstat_wait_{start,end} rebase migrating task's wait_start
+ * time relying on p->on_rq.
+ */
+ WARN_ON_ONCE(p->state == TASK_RUNNING &&
+ p->sched_class == &fair_sched_class &&
+ (p->on_rq && !task_on_rq_migrating(p)));
+
#ifdef CONFIG_LOCKDEP
/*
* The caller should hold either p->pi_lock or rq->lock, when changing
@@ -1310,9 +1319,11 @@ static void __migrate_swap_task(struct task_struct *p, int cpu)
src_rq = task_rq(p);
dst_rq = cpu_rq(cpu);
+ p->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(src_rq, p, 0);
set_task_cpu(p, cpu);
activate_task(dst_rq, p, 0);
+ p->on_rq = TASK_ON_RQ_QUEUED;
check_preempt_curr(dst_rq, p, 0);
} else {
/*
@@ -1905,6 +1916,97 @@ static void ttwu_queue(struct task_struct *p, int cpu)
raw_spin_unlock(&rq->lock);
}
+/*
+ * Notes on Program-Order guarantees on SMP systems.
+ *
+ * MIGRATION
+ *
+ * The basic program-order guarantee on SMP systems is that when a task [t]
+ * migrates, all its activity on its old cpu [c0] happens-before any subsequent
+ * execution on its new cpu [c1].
+ *
+ * For migration (of runnable tasks) this is provided by the following means:
+ *
+ * A) UNLOCK of the rq(c0)->lock scheduling out task t
+ * B) migration for t is required to synchronize *both* rq(c0)->lock and
+ * rq(c1)->lock (if not at the same time, then in that order).
+ * C) LOCK of the rq(c1)->lock scheduling in task
+ *
+ * Transitivity guarantees that B happens after A and C after B.
+ * Note: we only require RCpc transitivity.
+ * Note: the cpu doing B need not be c0 or c1
+ *
+ * Example:
+ *
+ * CPU0 CPU1 CPU2
+ *
+ * LOCK rq(0)->lock
+ * sched-out X
+ * sched-in Y
+ * UNLOCK rq(0)->lock
+ *
+ * LOCK rq(0)->lock // orders against CPU0
+ * dequeue X
+ * UNLOCK rq(0)->lock
+ *
+ * LOCK rq(1)->lock
+ * enqueue X
+ * UNLOCK rq(1)->lock
+ *
+ * LOCK rq(1)->lock // orders against CPU2
+ * sched-out Z
+ * sched-in X
+ * UNLOCK rq(1)->lock
+ *
+ *
+ * BLOCKING -- aka. SLEEP + WAKEUP
+ *
+ * For blocking we (obviously) need to provide the same guarantee as for
+ * migration. However the means are completely different as there is no lock
+ * chain to provide order. Instead we do:
+ *
+ * 1) smp_store_release(X->on_cpu, 0)
+ * 2) smp_cond_acquire(!X->on_cpu)
+ *
+ * Example:
+ *
+ * CPU0 (schedule) CPU1 (try_to_wake_up) CPU2 (schedule)
+ *
+ * LOCK rq(0)->lock LOCK X->pi_lock
+ * dequeue X
+ * sched-out X
+ * smp_store_release(X->on_cpu, 0);
+ *
+ * smp_cond_acquire(!X->on_cpu);
+ * X->state = WAKING
+ * set_task_cpu(X,2)
+ *
+ * LOCK rq(2)->lock
+ * enqueue X
+ * X->state = RUNNING
+ * UNLOCK rq(2)->lock
+ *
+ * LOCK rq(2)->lock // orders against CPU1
+ * sched-out Z
+ * sched-in X
+ * UNLOCK rq(2)->lock
+ *
+ * UNLOCK X->pi_lock
+ * UNLOCK rq(0)->lock
+ *
+ *
+ * However; for wakeups there is a second guarantee we must provide, namely we
+ * must observe the state that lead to our wakeup. That is, not only must our
+ * task observe its own prior state, it must also observe the stores prior to
+ * its wakeup.
+ *
+ * This means that any means of doing remote wakeups must order the CPU doing
+ * the wakeup against the CPU the task is going to end up running on. This,
+ * however, is already required for the regular Program-Order guarantee above,
+ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_acquire).
+ *
+ */
+
/**
* try_to_wake_up - wake up a thread
* @p: the thread to be awakened
@@ -1947,15 +2049,34 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
#ifdef CONFIG_SMP
/*
- * If the owning (remote) cpu is still in the middle of schedule() with
- * this task as prev, wait until its done referencing the task.
+ * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
+ * possible to, falsely, observe p->on_cpu == 0.
+ *
+ * One must be running (->on_cpu == 1) in order to remove oneself
+ * from the runqueue.
+ *
+ * [S] ->on_cpu = 1; [L] ->on_rq
+ * UNLOCK rq->lock
+ * RMB
+ * LOCK rq->lock
+ * [S] ->on_rq = 0; [L] ->on_cpu
+ *
+ * Pairs with the full barrier implied in the UNLOCK+LOCK on rq->lock
+ * from the consecutive calls to schedule(); the first switching to our
+ * task, the second putting it to sleep.
*/
- while (p->on_cpu)
- cpu_relax();
+ smp_rmb();
+
/*
- * Pairs with the smp_wmb() in finish_lock_switch().
+ * If the owning (remote) cpu is still in the middle of schedule() with
+ * this task as prev, wait until its done referencing the task.
+ *
+ * Pairs with the smp_store_release() in finish_lock_switch().
+ *
+ * This ensures that tasks getting woken will be fully ordered against
+ * their previous state and preserve Program Order.
*/
- smp_rmb();
+ smp_cond_acquire(!p->on_cpu);
p->sched_contributes_to_load = !!task_contributes_to_load(p);
p->state = TASK_WAKING;
@@ -2039,7 +2160,6 @@ out:
*/
int wake_up_process(struct task_struct *p)
{
- WARN_ON(task_is_stopped_or_traced(p));
return try_to_wake_up(p, TASK_NORMAL, 0);
}
EXPORT_SYMBOL(wake_up_process);
@@ -2085,6 +2205,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.vruntime = 0;
INIT_LIST_HEAD(&p->se.group_node);
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ p->se.cfs_rq = NULL;
+#endif
+
#ifdef CONFIG_SCHEDSTATS
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
#endif
@@ -3085,7 +3209,6 @@ static void __sched notrace __schedule(bool preempt)
cpu = smp_processor_id();
rq = cpu_rq(cpu);
- rcu_note_context_switch();
prev = rq->curr;
/*
@@ -3104,13 +3227,16 @@ static void __sched notrace __schedule(bool preempt)
if (sched_feat(HRTICK))
hrtick_clear(rq);
+ local_irq_disable();
+ rcu_note_context_switch();
+
/*
* Make sure that signal_pending_state()->signal_pending() below
* can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
* done by the caller to avoid the race with signal_wake_up().
*/
smp_mb__before_spinlock();
- raw_spin_lock_irq(&rq->lock);
+ raw_spin_lock(&rq->lock);
lockdep_pin_lock(&rq->lock);
rq->clock_skip_update <<= 1; /* promote REQ to ACT */
@@ -5847,13 +5973,13 @@ static int init_rootdomain(struct root_domain *rd)
{
memset(rd, 0, sizeof(*rd));
- if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))
goto out;
- if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL))
goto free_span;
- if (!alloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))
goto free_online;
- if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
goto free_dlo_mask;
init_dl_bw(&rd->dl_bw);
@@ -7331,6 +7457,9 @@ int in_sched_functions(unsigned long addr)
*/
struct task_group root_task_group;
LIST_HEAD(task_groups);
+
+/* Cacheline aligned slab cache for task_group */
+static struct kmem_cache *task_group_cache __read_mostly;
#endif
DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
@@ -7388,11 +7517,12 @@ void __init sched_init(void)
#endif /* CONFIG_RT_GROUP_SCHED */
#ifdef CONFIG_CGROUP_SCHED
+ task_group_cache = KMEM_CACHE(task_group, 0);
+
list_add(&root_task_group.list, &task_groups);
INIT_LIST_HEAD(&root_task_group.children);
INIT_LIST_HEAD(&root_task_group.siblings);
autogroup_init(&init_task);
-
#endif /* CONFIG_CGROUP_SCHED */
for_each_possible_cpu(i) {
@@ -7673,7 +7803,7 @@ static void free_sched_group(struct task_group *tg)
free_fair_sched_group(tg);
free_rt_sched_group(tg);
autogroup_free(tg);
- kfree(tg);
+ kmem_cache_free(task_group_cache, tg);
}
/* allocate runqueue etc for a new task group */
@@ -7681,7 +7811,7 @@ struct task_group *sched_create_group(struct task_group *parent)
{
struct task_group *tg;
- tg = kzalloc(sizeof(*tg), GFP_KERNEL);
+ tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
if (!tg)
return ERR_PTR(-ENOMEM);
@@ -8217,12 +8347,12 @@ static void cpu_cgroup_fork(struct task_struct *task, void *private)
sched_move_task(task);
}
-static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
+ struct cgroup_subsys_state *css;
- cgroup_taskset_for_each(task, tset) {
+ cgroup_taskset_for_each(task, css, tset) {
#ifdef CONFIG_RT_GROUP_SCHED
if (!sched_rt_can_attach(css_tg(css), task))
return -EINVAL;
@@ -8235,12 +8365,12 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
return 0;
}
-static void cpu_cgroup_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
+ struct cgroup_subsys_state *css;
- cgroup_taskset_for_each(task, tset)
+ cgroup_taskset_for_each(task, css, tset)
sched_move_task(task);
}
@@ -8586,3 +8716,44 @@ void dump_cpu_task(int cpu)
pr_info("Task dump for CPU %d:\n", cpu);
sched_show_task(cpu_curr(cpu));
}
+
+/*
+ * Nice levels are multiplicative, with a gentle 10% change for every
+ * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
+ * nice 1, it will get ~10% less CPU time than another CPU-bound task
+ * that remained on nice 0.
+ *
+ * The "10% effect" is relative and cumulative: from _any_ nice level,
+ * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
+ * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
+ * If a task goes up by ~10% and another task goes down by ~10% then
+ * the relative distance between them is ~25%.)
+ */
+const int sched_prio_to_weight[40] = {
+ /* -20 */ 88761, 71755, 56483, 46273, 36291,
+ /* -15 */ 29154, 23254, 18705, 14949, 11916,
+ /* -10 */ 9548, 7620, 6100, 4904, 3906,
+ /* -5 */ 3121, 2501, 1991, 1586, 1277,
+ /* 0 */ 1024, 820, 655, 526, 423,
+ /* 5 */ 335, 272, 215, 172, 137,
+ /* 10 */ 110, 87, 70, 56, 45,
+ /* 15 */ 36, 29, 23, 18, 15,
+};
+
+/*
+ * Inverse (2^32/x) values of the sched_prio_to_weight[] array, precalculated.
+ *
+ * In cases where the weight does not change often, we can use the
+ * precalculated inverse to speed up arithmetics by turning divisions
+ * into multiplications:
+ */
+const u32 sched_prio_to_wmult[40] = {
+ /* -20 */ 48388, 59856, 76040, 92818, 118348,
+ /* -15 */ 147320, 184698, 229616, 287308, 360437,
+ /* -10 */ 449829, 563644, 704093, 875809, 1099582,
+ /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326,
+ /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587,
+ /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126,
+ /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717,
+ /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
+};
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 26a54461bf59..d5ff5c6bf829 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -466,7 +466,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();
- if (vtime_accounting_enabled())
+ if (vtime_accounting_cpu_enabled())
return;
if (sched_clock_irqtime) {
@@ -680,7 +680,7 @@ static cputime_t get_vtime_delta(struct task_struct *tsk)
{
unsigned long long delta = vtime_delta(tsk);
- WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING);
+ WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
tsk->vtime_snap += delta;
/* CHECKME: always safe to convert nsecs to cputime? */
@@ -696,37 +696,37 @@ static void __vtime_account_system(struct task_struct *tsk)
void vtime_account_system(struct task_struct *tsk)
{
- write_seqlock(&tsk->vtime_seqlock);
+ write_seqcount_begin(&tsk->vtime_seqcount);
__vtime_account_system(tsk);
- write_sequnlock(&tsk->vtime_seqlock);
+ write_seqcount_end(&tsk->vtime_seqcount);
}
void vtime_gen_account_irq_exit(struct task_struct *tsk)
{
- write_seqlock(&tsk->vtime_seqlock);
+ write_seqcount_begin(&tsk->vtime_seqcount);
__vtime_account_system(tsk);
if (context_tracking_in_user())
tsk->vtime_snap_whence = VTIME_USER;
- write_sequnlock(&tsk->vtime_seqlock);
+ write_seqcount_end(&tsk->vtime_seqcount);
}
void vtime_account_user(struct task_struct *tsk)
{
cputime_t delta_cpu;
- write_seqlock(&tsk->vtime_seqlock);
+ write_seqcount_begin(&tsk->vtime_seqcount);
delta_cpu = get_vtime_delta(tsk);
tsk->vtime_snap_whence = VTIME_SYS;
account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
- write_sequnlock(&tsk->vtime_seqlock);
+ write_seqcount_end(&tsk->vtime_seqcount);
}
void vtime_user_enter(struct task_struct *tsk)
{
- write_seqlock(&tsk->vtime_seqlock);
+ write_seqcount_begin(&tsk->vtime_seqcount);
__vtime_account_system(tsk);
tsk->vtime_snap_whence = VTIME_USER;
- write_sequnlock(&tsk->vtime_seqlock);
+ write_seqcount_end(&tsk->vtime_seqcount);
}
void vtime_guest_enter(struct task_struct *tsk)
@@ -738,19 +738,19 @@ void vtime_guest_enter(struct task_struct *tsk)
* synchronization against the reader (task_gtime())
* that can thus safely catch up with a tickless delta.
*/
- write_seqlock(&tsk->vtime_seqlock);
+ write_seqcount_begin(&tsk->vtime_seqcount);
__vtime_account_system(tsk);
current->flags |= PF_VCPU;
- write_sequnlock(&tsk->vtime_seqlock);
+ write_seqcount_end(&tsk->vtime_seqcount);
}
EXPORT_SYMBOL_GPL(vtime_guest_enter);
void vtime_guest_exit(struct task_struct *tsk)
{
- write_seqlock(&tsk->vtime_seqlock);
+ write_seqcount_begin(&tsk->vtime_seqcount);
__vtime_account_system(tsk);
current->flags &= ~PF_VCPU;
- write_sequnlock(&tsk->vtime_seqlock);
+ write_seqcount_end(&tsk->vtime_seqcount);
}
EXPORT_SYMBOL_GPL(vtime_guest_exit);
@@ -763,24 +763,26 @@ void vtime_account_idle(struct task_struct *tsk)
void arch_vtime_task_switch(struct task_struct *prev)
{
- write_seqlock(&prev->vtime_seqlock);
- prev->vtime_snap_whence = VTIME_SLEEPING;
- write_sequnlock(&prev->vtime_seqlock);
+ write_seqcount_begin(&prev->vtime_seqcount);
+ prev->vtime_snap_whence = VTIME_INACTIVE;
+ write_seqcount_end(&prev->vtime_seqcount);
- write_seqlock(&current->vtime_seqlock);
+ write_seqcount_begin(&current->vtime_seqcount);
current->vtime_snap_whence = VTIME_SYS;
current->vtime_snap = sched_clock_cpu(smp_processor_id());
- write_sequnlock(&current->vtime_seqlock);
+ write_seqcount_end(&current->vtime_seqcount);
}
void vtime_init_idle(struct task_struct *t, int cpu)
{
unsigned long flags;
- write_seqlock_irqsave(&t->vtime_seqlock, flags);
+ local_irq_save(flags);
+ write_seqcount_begin(&t->vtime_seqcount);
t->vtime_snap_whence = VTIME_SYS;
t->vtime_snap = sched_clock_cpu(cpu);
- write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
+ write_seqcount_end(&t->vtime_seqcount);
+ local_irq_restore(flags);
}
cputime_t task_gtime(struct task_struct *t)
@@ -788,14 +790,17 @@ cputime_t task_gtime(struct task_struct *t)
unsigned int seq;
cputime_t gtime;
+ if (!vtime_accounting_enabled())
+ return t->gtime;
+
do {
- seq = read_seqbegin(&t->vtime_seqlock);
+ seq = read_seqcount_begin(&t->vtime_seqcount);
gtime = t->gtime;
- if (t->flags & PF_VCPU)
+ if (t->vtime_snap_whence == VTIME_SYS && t->flags & PF_VCPU)
gtime += vtime_delta(t);
- } while (read_seqretry(&t->vtime_seqlock, seq));
+ } while (read_seqcount_retry(&t->vtime_seqcount, seq));
return gtime;
}
@@ -818,7 +823,7 @@ fetch_task_cputime(struct task_struct *t,
*udelta = 0;
*sdelta = 0;
- seq = read_seqbegin(&t->vtime_seqlock);
+ seq = read_seqcount_begin(&t->vtime_seqcount);
if (u_dst)
*u_dst = *u_src;
@@ -826,7 +831,7 @@ fetch_task_cputime(struct task_struct *t,
*s_dst = *s_src;
/* Task is sleeping, nothing to add */
- if (t->vtime_snap_whence == VTIME_SLEEPING ||
+ if (t->vtime_snap_whence == VTIME_INACTIVE ||
is_idle_task(t))
continue;
@@ -842,7 +847,7 @@ fetch_task_cputime(struct task_struct *t,
if (t->vtime_snap_whence == VTIME_SYS)
*sdelta = delta;
}
- } while (read_seqretry(&t->vtime_seqlock, seq));
+ } while (read_seqcount_retry(&t->vtime_seqcount, seq));
}
@@ -850,6 +855,14 @@ void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
{
cputime_t udelta, sdelta;
+ if (!vtime_accounting_enabled()) {
+ if (utime)
+ *utime = t->utime;
+ if (stime)
+ *stime = t->stime;
+ return;
+ }
+
fetch_task_cputime(t, utime, stime, &t->utime,
&t->stime, &udelta, &sdelta);
if (utime)
@@ -863,6 +876,14 @@ void task_cputime_scaled(struct task_struct *t,
{
cputime_t udelta, sdelta;
+ if (!vtime_accounting_enabled()) {
+ if (utimescaled)
+ *utimescaled = t->utimescaled;
+ if (stimescaled)
+ *stimescaled = t->stimescaled;
+ return;
+ }
+
fetch_task_cputime(t, utimescaled, stimescaled,
&t->utimescaled, &t->stimescaled, &udelta, &sdelta);
if (utimescaled)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 8b0a15e285f9..cd64c979d0e1 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -176,8 +176,10 @@ static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
}
}
- if (leftmost)
+ if (leftmost) {
dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks;
+ dl_rq->earliest_dl.next = p->dl.deadline;
+ }
rb_link_node(&p->pushable_dl_tasks, parent, link);
rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
@@ -195,6 +197,10 @@ static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
next_node = rb_next(&p->pushable_dl_tasks);
dl_rq->pushable_dl_tasks_leftmost = next_node;
+ if (next_node) {
+ dl_rq->earliest_dl.next = rb_entry(next_node,
+ struct task_struct, pushable_dl_tasks)->dl.deadline;
+ }
}
rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
@@ -782,42 +788,14 @@ static void update_curr_dl(struct rq *rq)
#ifdef CONFIG_SMP
-static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu);
-
-static inline u64 next_deadline(struct rq *rq)
-{
- struct task_struct *next = pick_next_earliest_dl_task(rq, rq->cpu);
-
- if (next && dl_prio(next->prio))
- return next->dl.deadline;
- else
- return 0;
-}
-
static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
{
struct rq *rq = rq_of_dl_rq(dl_rq);
if (dl_rq->earliest_dl.curr == 0 ||
dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
- /*
- * If the dl_rq had no -deadline tasks, or if the new task
- * has shorter deadline than the current one on dl_rq, we
- * know that the previous earliest becomes our next earliest,
- * as the new task becomes the earliest itself.
- */
- dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr;
dl_rq->earliest_dl.curr = deadline;
cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1);
- } else if (dl_rq->earliest_dl.next == 0 ||
- dl_time_before(deadline, dl_rq->earliest_dl.next)) {
- /*
- * On the other hand, if the new -deadline task has a
- * a later deadline than the earliest one on dl_rq, but
- * it is earlier than the next (if any), we must
- * recompute the next-earliest.
- */
- dl_rq->earliest_dl.next = next_deadline(rq);
}
}
@@ -839,7 +817,6 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
dl_rq->earliest_dl.curr = entry->deadline;
- dl_rq->earliest_dl.next = next_deadline(rq);
cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1);
}
}
@@ -1274,28 +1251,6 @@ static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
return 0;
}
-/* Returns the second earliest -deadline task, NULL otherwise */
-static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu)
-{
- struct rb_node *next_node = rq->dl.rb_leftmost;
- struct sched_dl_entity *dl_se;
- struct task_struct *p = NULL;
-
-next_node:
- next_node = rb_next(next_node);
- if (next_node) {
- dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node);
- p = dl_task_of(dl_se);
-
- if (pick_dl_task(rq, p, cpu))
- return p;
-
- goto next_node;
- }
-
- return NULL;
-}
-
/*
* Return the earliest pushable rq's task, which is suitable to be executed
* on the CPU, NULL otherwise:
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f04fda8f669c..1926606ece80 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -17,7 +17,7 @@
* Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de>
*
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra
- * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
*/
#include <linux/latencytop.h>
@@ -738,12 +738,56 @@ static void update_curr_fair(struct rq *rq)
update_curr(cfs_rq_of(&rq->curr->se));
}
+#ifdef CONFIG_SCHEDSTATS
+static inline void
+update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ u64 wait_start = rq_clock(rq_of(cfs_rq));
+
+ if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
+ likely(wait_start > se->statistics.wait_start))
+ wait_start -= se->statistics.wait_start;
+
+ se->statistics.wait_start = wait_start;
+}
+
+static void
+update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ struct task_struct *p;
+ u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
+
+ if (entity_is_task(se)) {
+ p = task_of(se);
+ if (task_on_rq_migrating(p)) {
+ /*
+ * Preserve migrating task's wait time so wait_start
+ * time stamp can be adjusted to accumulate wait time
+ * prior to migration.
+ */
+ se->statistics.wait_start = delta;
+ return;
+ }
+ trace_sched_stat_wait(p, delta);
+ }
+
+ se->statistics.wait_max = max(se->statistics.wait_max, delta);
+ se->statistics.wait_count++;
+ se->statistics.wait_sum += delta;
+ se->statistics.wait_start = 0;
+}
+#else
static inline void
update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- schedstat_set(se->statistics.wait_start, rq_clock(rq_of(cfs_rq)));
}
+static inline void
+update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+}
+#endif
+
/*
* Task is being enqueued - update stats:
*/
@@ -757,23 +801,6 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
update_stats_wait_start(cfs_rq, se);
}
-static void
-update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
- schedstat_set(se->statistics.wait_max, max(se->statistics.wait_max,
- rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start));
- schedstat_set(se->statistics.wait_count, se->statistics.wait_count + 1);
- schedstat_set(se->statistics.wait_sum, se->statistics.wait_sum +
- rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start);
-#ifdef CONFIG_SCHEDSTATS
- if (entity_is_task(se)) {
- trace_sched_stat_wait(task_of(se),
- rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start);
- }
-#endif
- schedstat_set(se->statistics.wait_start, 0);
-}
-
static inline void
update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
@@ -2155,6 +2182,7 @@ void task_numa_work(struct callback_head *work)
unsigned long migrate, next_scan, now = jiffies;
struct task_struct *p = current;
struct mm_struct *mm = p->mm;
+ u64 runtime = p->se.sum_exec_runtime;
struct vm_area_struct *vma;
unsigned long start, end;
unsigned long nr_pte_updates = 0;
@@ -2277,6 +2305,17 @@ out:
else
reset_ptenuma_scan(p);
up_read(&mm->mmap_sem);
+
+ /*
+ * Make sure tasks use at least 32x as much time to run other code
+ * than they used here, to limit NUMA PTE scanning overhead to 3% max.
+ * Usually update_task_scan_period slows down scanning enough; on an
+ * overloaded system we need to limit overhead on a per task basis.
+ */
+ if (unlikely(p->se.sum_exec_runtime != runtime)) {
+ u64 diff = p->se.sum_exec_runtime - runtime;
+ p->node_stamp += 32 * diff;
+ }
}
/*
@@ -2670,12 +2709,64 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
{
long delta = cfs_rq->avg.load_avg - cfs_rq->tg_load_avg_contrib;
+ /*
+ * No need to update load_avg for root_task_group as it is not used.
+ */
+ if (cfs_rq->tg == &root_task_group)
+ return;
+
if (force || abs(delta) > cfs_rq->tg_load_avg_contrib / 64) {
atomic_long_add(delta, &cfs_rq->tg->load_avg);
cfs_rq->tg_load_avg_contrib = cfs_rq->avg.load_avg;
}
}
+/*
+ * Called within set_task_rq() right before setting a task's cpu. The
+ * caller only guarantees p->pi_lock is held; no other assumptions,
+ * including the state of rq->lock, should be made.
+ */
+void set_task_rq_fair(struct sched_entity *se,
+ struct cfs_rq *prev, struct cfs_rq *next)
+{
+ if (!sched_feat(ATTACH_AGE_LOAD))
+ return;
+
+ /*
+ * We are supposed to update the task to "current" time, then its up to
+ * date and ready to go to new CPU/cfs_rq. But we have difficulty in
+ * getting what current time is, so simply throw away the out-of-date
+ * time. This will result in the wakee task is less decayed, but giving
+ * the wakee more load sounds not bad.
+ */
+ if (se->avg.last_update_time && prev) {
+ u64 p_last_update_time;
+ u64 n_last_update_time;
+
+#ifndef CONFIG_64BIT
+ u64 p_last_update_time_copy;
+ u64 n_last_update_time_copy;
+
+ do {
+ p_last_update_time_copy = prev->load_last_update_time_copy;
+ n_last_update_time_copy = next->load_last_update_time_copy;
+
+ smp_rmb();
+
+ p_last_update_time = prev->avg.last_update_time;
+ n_last_update_time = next->avg.last_update_time;
+
+ } while (p_last_update_time != p_last_update_time_copy ||
+ n_last_update_time != n_last_update_time_copy);
+#else
+ p_last_update_time = prev->avg.last_update_time;
+ n_last_update_time = next->avg.last_update_time;
+#endif
+ __update_load_avg(p_last_update_time, cpu_of(rq_of(prev)),
+ &se->avg, 0, 0, NULL);
+ se->avg.last_update_time = n_last_update_time;
+ }
+}
#else /* CONFIG_FAIR_GROUP_SCHED */
static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
#endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -2689,7 +2780,7 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
int decayed, removed = 0;
if (atomic_long_read(&cfs_rq->removed_load_avg)) {
- long r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
+ s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
sa->load_avg = max_t(long, sa->load_avg - r, 0);
sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
removed = 1;
@@ -2809,48 +2900,48 @@ dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
max_t(s64, cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
}
-/*
- * Task first catches up with cfs_rq, and then subtract
- * itself from the cfs_rq (task must be off the queue now).
- */
-void remove_entity_load_avg(struct sched_entity *se)
-{
- struct cfs_rq *cfs_rq = cfs_rq_of(se);
- u64 last_update_time;
-
#ifndef CONFIG_64BIT
+static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
+{
u64 last_update_time_copy;
+ u64 last_update_time;
do {
last_update_time_copy = cfs_rq->load_last_update_time_copy;
smp_rmb();
last_update_time = cfs_rq->avg.last_update_time;
} while (last_update_time != last_update_time_copy);
-#else
- last_update_time = cfs_rq->avg.last_update_time;
-#endif
- __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
- atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg);
- atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg);
+ return last_update_time;
}
-
-/*
- * Update the rq's load with the elapsed running time before entering
- * idle. if the last scheduled task is not a CFS task, idle_enter will
- * be the only way to update the runnable statistic.
- */
-void idle_enter_fair(struct rq *this_rq)
+#else
+static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
{
+ return cfs_rq->avg.last_update_time;
}
+#endif
/*
- * Update the rq's load with the elapsed idle time before a task is
- * scheduled. if the newly scheduled task is not a CFS task, idle_exit will
- * be the only way to update the runnable statistic.
+ * Task first catches up with cfs_rq, and then subtract
+ * itself from the cfs_rq (task must be off the queue now).
*/
-void idle_exit_fair(struct rq *this_rq)
+void remove_entity_load_avg(struct sched_entity *se)
{
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ u64 last_update_time;
+
+ /*
+ * Newly created task or never used group entity should not be removed
+ * from its (source) cfs_rq
+ */
+ if (se->avg.last_update_time == 0)
+ return;
+
+ last_update_time = cfs_rq_last_update_time(cfs_rq);
+
+ __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
+ atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg);
+ atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg);
}
static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq)
@@ -4240,42 +4331,37 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
*/
/*
- * The exact cpuload at various idx values, calculated at every tick would be
- * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load
+ * The exact cpuload calculated at every tick would be:
+ *
+ * load' = (1 - 1/2^i) * load + (1/2^i) * cur_load
*
- * If a cpu misses updates for n-1 ticks (as it was idle) and update gets called
- * on nth tick when cpu may be busy, then we have:
- * load = ((2^idx - 1) / 2^idx)^(n-1) * load
- * load = (2^idx - 1) / 2^idx) * load + 1 / 2^idx * cur_load
+ * If a cpu misses updates for n ticks (as it was idle) and update gets
+ * called on the n+1-th tick when cpu may be busy, then we have:
+ *
+ * load_n = (1 - 1/2^i)^n * load_0
+ * load_n+1 = (1 - 1/2^i) * load_n + (1/2^i) * cur_load
*
* decay_load_missed() below does efficient calculation of
- * load = ((2^idx - 1) / 2^idx)^(n-1) * load
- * avoiding 0..n-1 loop doing load = ((2^idx - 1) / 2^idx) * load
+ *
+ * load' = (1 - 1/2^i)^n * load
+ *
+ * Because x^(n+m) := x^n * x^m we can decompose any x^n in power-of-2 factors.
+ * This allows us to precompute the above in said factors, thereby allowing the
+ * reduction of an arbitrary n in O(log_2 n) steps. (See also
+ * fixed_power_int())
*
* The calculation is approximated on a 128 point scale.
- * degrade_zero_ticks is the number of ticks after which load at any
- * particular idx is approximated to be zero.
- * degrade_factor is a precomputed table, a row for each load idx.
- * Each column corresponds to degradation factor for a power of two ticks,
- * based on 128 point scale.
- * Example:
- * row 2, col 3 (=12) says that the degradation at load idx 2 after
- * 8 ticks is 12/128 (which is an approximation of exact factor 3^8/4^8).
- *
- * With this power of 2 load factors, we can degrade the load n times
- * by looking at 1 bits in n and doing as many mult/shift instead of
- * n mult/shifts needed by the exact degradation.
*/
#define DEGRADE_SHIFT 7
-static const unsigned char
- degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
-static const unsigned char
- degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
- {0, 0, 0, 0, 0, 0, 0, 0},
- {64, 32, 8, 0, 0, 0, 0, 0},
- {96, 72, 40, 12, 1, 0, 0},
- {112, 98, 75, 43, 15, 1, 0},
- {120, 112, 98, 76, 45, 16, 2} };
+
+static const u8 degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
+static const u8 degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
+ { 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 64, 32, 8, 0, 0, 0, 0, 0 },
+ { 96, 72, 40, 12, 1, 0, 0, 0 },
+ { 112, 98, 75, 43, 15, 1, 0, 0 },
+ { 120, 112, 98, 76, 45, 16, 2, 0 }
+};
/*
* Update cpu_load for any missed ticks, due to tickless idle. The backlog
@@ -4306,14 +4392,46 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
return load;
}
-/*
+/**
+ * __update_cpu_load - update the rq->cpu_load[] statistics
+ * @this_rq: The rq to update statistics for
+ * @this_load: The current load
+ * @pending_updates: The number of missed updates
+ * @active: !0 for NOHZ_FULL
+ *
* Update rq->cpu_load[] statistics. This function is usually called every
- * scheduler tick (TICK_NSEC). With tickless idle this will not be called
- * every tick. We fix it up based on jiffies.
+ * scheduler tick (TICK_NSEC).
+ *
+ * This function computes a decaying average:
+ *
+ * load[i]' = (1 - 1/2^i) * load[i] + (1/2^i) * load
+ *
+ * Because of NOHZ it might not get called on every tick which gives need for
+ * the @pending_updates argument.
+ *
+ * load[i]_n = (1 - 1/2^i) * load[i]_n-1 + (1/2^i) * load_n-1
+ * = A * load[i]_n-1 + B ; A := (1 - 1/2^i), B := (1/2^i) * load
+ * = A * (A * load[i]_n-2 + B) + B
+ * = A * (A * (A * load[i]_n-3 + B) + B) + B
+ * = A^3 * load[i]_n-3 + (A^2 + A + 1) * B
+ * = A^n * load[i]_0 + (A^(n-1) + A^(n-2) + ... + 1) * B
+ * = A^n * load[i]_0 + ((1 - A^n) / (1 - A)) * B
+ * = (1 - 1/2^i)^n * (load[i]_0 - load) + load
+ *
+ * In the above we've assumed load_n := load, which is true for NOHZ_FULL as
+ * any change in load would have resulted in the tick being turned back on.
+ *
+ * For regular NOHZ, this reduces to:
+ *
+ * load[i]_n = (1 - 1/2^i)^n * load[i]_0
+ *
+ * see decay_load_misses(). For NOHZ_FULL we get to subtract and add the extra
+ * term. See the @active paramter.
*/
static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
- unsigned long pending_updates)
+ unsigned long pending_updates, int active)
{
+ unsigned long tickless_load = active ? this_rq->cpu_load[0] : 0;
int i, scale;
this_rq->nr_load_updates++;
@@ -4325,8 +4443,9 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
/* scale is effectively 1 << i now, and >> i divides by scale */
- old_load = this_rq->cpu_load[i];
+ old_load = this_rq->cpu_load[i] - tickless_load;
old_load = decay_load_missed(old_load, pending_updates - 1, i);
+ old_load += tickless_load;
new_load = this_load;
/*
* Round up the averaging division if load is increasing. This
@@ -4381,16 +4500,17 @@ static void update_idle_cpu_load(struct rq *this_rq)
pending_updates = curr_jiffies - this_rq->last_load_update_tick;
this_rq->last_load_update_tick = curr_jiffies;
- __update_cpu_load(this_rq, load, pending_updates);
+ __update_cpu_load(this_rq, load, pending_updates, 0);
}
/*
* Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
*/
-void update_cpu_load_nohz(void)
+void update_cpu_load_nohz(int active)
{
struct rq *this_rq = this_rq();
unsigned long curr_jiffies = READ_ONCE(jiffies);
+ unsigned long load = active ? weighted_cpuload(cpu_of(this_rq)) : 0;
unsigned long pending_updates;
if (curr_jiffies == this_rq->last_load_update_tick)
@@ -4401,10 +4521,11 @@ void update_cpu_load_nohz(void)
if (pending_updates) {
this_rq->last_load_update_tick = curr_jiffies;
/*
- * We were idle, this means load 0, the current load might be
- * !0 due to remote wakeups and the sort.
+ * In the regular NOHZ case, we were idle, this means load 0.
+ * In the NOHZ_FULL case, we were non-idle, we should consider
+ * its weighted load.
*/
- __update_cpu_load(this_rq, 0, pending_updates);
+ __update_cpu_load(this_rq, load, pending_updates, active);
}
raw_spin_unlock(&this_rq->lock);
}
@@ -4420,7 +4541,7 @@ void update_cpu_load_active(struct rq *this_rq)
* See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
*/
this_rq->last_load_update_tick = jiffies;
- __update_cpu_load(this_rq, load, 1);
+ __update_cpu_load(this_rq, load, 1, 1);
}
/*
@@ -5007,8 +5128,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
/*
* Called immediately before a task is migrated to a new cpu; task_cpu(p) and
* cfs_rq_of(p) references at time of call are still valid and identify the
- * previous cpu. However, the caller only guarantees p->pi_lock is held; no
- * other assumptions, including the state of rq->lock, should be made.
+ * previous cpu. The caller guarantees p->pi_lock or task_rq(p)->lock is held.
*/
static void migrate_task_rq_fair(struct task_struct *p)
{
@@ -5721,8 +5841,8 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
{
lockdep_assert_held(&env->src_rq->lock);
- deactivate_task(env->src_rq, p, 0);
p->on_rq = TASK_ON_RQ_MIGRATING;
+ deactivate_task(env->src_rq, p, 0);
set_task_cpu(p, env->dst_cpu);
}
@@ -5855,8 +5975,8 @@ static void attach_task(struct rq *rq, struct task_struct *p)
lockdep_assert_held(&rq->lock);
BUG_ON(task_rq(p) != rq);
- p->on_rq = TASK_ON_RQ_QUEUED;
activate_task(rq, p, 0);
+ p->on_rq = TASK_ON_RQ_QUEUED;
check_preempt_curr(rq, p, 0);
}
@@ -6302,7 +6422,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
bool *overload)
{
unsigned long load;
- int i;
+ int i, nr_running;
memset(sgs, 0, sizeof(*sgs));
@@ -6319,7 +6439,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
sgs->group_util += cpu_util(i);
sgs->sum_nr_running += rq->cfs.h_nr_running;
- if (rq->nr_running > 1)
+ nr_running = rq->nr_running;
+ if (nr_running > 1)
*overload = true;
#ifdef CONFIG_NUMA_BALANCING
@@ -6327,7 +6448,10 @@ static inline void update_sg_lb_stats(struct lb_env *env,
sgs->nr_preferred_running += rq->nr_preferred_running;
#endif
sgs->sum_weighted_load += weighted_cpuload(i);
- if (idle_cpu(i))
+ /*
+ * No need to call idle_cpu() if nr_running is not 0
+ */
+ if (!nr_running && idle_cpu(i))
sgs->idle_cpus++;
}
@@ -7248,8 +7372,6 @@ static int idle_balance(struct rq *this_rq)
int pulled_task = 0;
u64 curr_cost = 0;
- idle_enter_fair(this_rq);
-
/*
* We must set idle_stamp _before_ calling idle_balance(), such that we
* measure the duration of idle_balance() as idle time.
@@ -7330,10 +7452,8 @@ out:
if (this_rq->nr_running != this_rq->cfs.h_nr_running)
pulled_task = -1;
- if (pulled_task) {
- idle_exit_fair(this_rq);
+ if (pulled_task)
this_rq->idle_stamp = 0;
- }
return pulled_task;
}
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index c4ae0f1fdf9b..47ce94931f1b 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -47,7 +47,6 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
{
- idle_exit_fair(rq);
rq_last_tick_reset(rq);
}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index e3cc16312046..8ec86abe0ea1 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -64,7 +64,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
raw_spin_unlock(&rt_b->rt_runtime_lock);
}
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(HAVE_RT_PUSH_IPI)
static void push_irq_work_func(struct irq_work *work);
#endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index efd3bfc7e347..10f16374df7f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -248,7 +248,12 @@ struct task_group {
unsigned long shares;
#ifdef CONFIG_SMP
- atomic_long_t load_avg;
+ /*
+ * load_avg can be heavily contended at clock tick time, so put
+ * it in its own cacheline separated from the fields above which
+ * will also be accessed at each tick.
+ */
+ atomic_long_t load_avg ____cacheline_aligned;
#endif
#endif
@@ -335,7 +340,15 @@ extern void sched_move_task(struct task_struct *tsk);
#ifdef CONFIG_FAIR_GROUP_SCHED
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
-#endif
+
+#ifdef CONFIG_SMP
+extern void set_task_rq_fair(struct sched_entity *se,
+ struct cfs_rq *prev, struct cfs_rq *next);
+#else /* !CONFIG_SMP */
+static inline void set_task_rq_fair(struct sched_entity *se,
+ struct cfs_rq *prev, struct cfs_rq *next) { }
+#endif /* CONFIG_SMP */
+#endif /* CONFIG_FAIR_GROUP_SCHED */
#else /* CONFIG_CGROUP_SCHED */
@@ -933,6 +946,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
+ set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
p->se.cfs_rq = tg->cfs_rq[cpu];
p->se.parent = tg->se[cpu];
#endif
@@ -1073,7 +1087,10 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
* We must ensure this doesn't happen until the switch is completely
* finished.
*
- * Pairs with the control dependency and rmb in try_to_wake_up().
+ * In particular, the load of prev->state in finish_task_switch() must
+ * happen before this.
+ *
+ * Pairs with the smp_cond_acquire() in try_to_wake_up().
*/
smp_store_release(&prev->on_cpu, 0);
#endif
@@ -1110,46 +1127,8 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
#define WEIGHT_IDLEPRIO 3
#define WMULT_IDLEPRIO 1431655765
-/*
- * Nice levels are multiplicative, with a gentle 10% change for every
- * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
- * nice 1, it will get ~10% less CPU time than another CPU-bound task
- * that remained on nice 0.
- *
- * The "10% effect" is relative and cumulative: from _any_ nice level,
- * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
- * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
- * If a task goes up by ~10% and another task goes down by ~10% then
- * the relative distance between them is ~25%.)
- */
-static const int prio_to_weight[40] = {
- /* -20 */ 88761, 71755, 56483, 46273, 36291,
- /* -15 */ 29154, 23254, 18705, 14949, 11916,
- /* -10 */ 9548, 7620, 6100, 4904, 3906,
- /* -5 */ 3121, 2501, 1991, 1586, 1277,
- /* 0 */ 1024, 820, 655, 526, 423,
- /* 5 */ 335, 272, 215, 172, 137,
- /* 10 */ 110, 87, 70, 56, 45,
- /* 15 */ 36, 29, 23, 18, 15,
-};
-
-/*
- * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated.
- *
- * In cases where the weight does not change often, we can use the
- * precalculated inverse to speed up arithmetics by turning divisions
- * into multiplications:
- */
-static const u32 prio_to_wmult[40] = {
- /* -20 */ 48388, 59856, 76040, 92818, 118348,
- /* -15 */ 147320, 184698, 229616, 287308, 360437,
- /* -10 */ 449829, 563644, 704093, 875809, 1099582,
- /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326,
- /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587,
- /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126,
- /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717,
- /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
-};
+extern const int sched_prio_to_weight[40];
+extern const u32 sched_prio_to_wmult[40];
#define ENQUEUE_WAKEUP 0x01
#define ENQUEUE_HEAD 0x02
@@ -1249,16 +1228,8 @@ extern void update_group_capacity(struct sched_domain *sd, int cpu);
extern void trigger_load_balance(struct rq *rq);
-extern void idle_enter_fair(struct rq *this_rq);
-extern void idle_exit_fair(struct rq *this_rq);
-
extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
-#else
-
-static inline void idle_enter_fair(struct rq *rq) { }
-static inline void idle_exit_fair(struct rq *rq) { }
-
#endif
#ifdef CONFIG_CPU_IDLE
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 052e02672d12..f15d6b6a538a 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -392,7 +392,7 @@ __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
do {
prepare_to_wait(wq, &q->wait, mode);
if (test_bit(q->key.bit_nr, q->key.flags))
- ret = (*action)(&q->key);
+ ret = (*action)(&q->key, mode);
} while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
finish_wait(wq, &q->wait);
return ret;
@@ -431,7 +431,7 @@ __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
prepare_to_wait_exclusive(wq, &q->wait, mode);
if (!test_bit(q->key.bit_nr, q->key.flags))
continue;
- ret = action(&q->key);
+ ret = action(&q->key, mode);
if (!ret)
continue;
abort_exclusive_wait(wq, &q->wait, mode, &q->key);
@@ -581,44 +581,44 @@ void wake_up_atomic_t(atomic_t *p)
}
EXPORT_SYMBOL(wake_up_atomic_t);
-__sched int bit_wait(struct wait_bit_key *word)
+__sched int bit_wait(struct wait_bit_key *word, int mode)
{
- if (signal_pending_state(current->state, current))
- return 1;
schedule();
+ if (signal_pending_state(mode, current))
+ return -EINTR;
return 0;
}
EXPORT_SYMBOL(bit_wait);
-__sched int bit_wait_io(struct wait_bit_key *word)
+__sched int bit_wait_io(struct wait_bit_key *word, int mode)
{
- if (signal_pending_state(current->state, current))
- return 1;
io_schedule();
+ if (signal_pending_state(mode, current))
+ return -EINTR;
return 0;
}
EXPORT_SYMBOL(bit_wait_io);
-__sched int bit_wait_timeout(struct wait_bit_key *word)
+__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
{
unsigned long now = READ_ONCE(jiffies);
- if (signal_pending_state(current->state, current))
- return 1;
if (time_after_eq(now, word->timeout))
return -EAGAIN;
schedule_timeout(word->timeout - now);
+ if (signal_pending_state(mode, current))
+ return -EINTR;
return 0;
}
EXPORT_SYMBOL_GPL(bit_wait_timeout);
-__sched int bit_wait_io_timeout(struct wait_bit_key *word)
+__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
{
unsigned long now = READ_ONCE(jiffies);
- if (signal_pending_state(current->state, current))
- return 1;
if (time_after_eq(now, word->timeout))
return -EAGAIN;
io_schedule_timeout(word->timeout - now);
+ if (signal_pending_state(mode, current))
+ return -EINTR;
return 0;
}
EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
diff --git a/kernel/signal.c b/kernel/signal.c
index c0b01fe24bbd..f3f1f7a972fd 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3503,7 +3503,7 @@ SYSCALL_DEFINE0(pause)
#endif
-int sigsuspend(sigset_t *set)
+static int sigsuspend(sigset_t *set)
{
current->saved_sigmask = current->blocked;
set_current_blocked(set);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 867bc20e1ef1..edb6de4f5908 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -28,7 +28,6 @@
*/
struct cpu_stop_done {
atomic_t nr_todo; /* nr left to execute */
- bool executed; /* actually executed? */
int ret; /* collected return value */
struct completion completion; /* fired if nr_todo reaches 0 */
};
@@ -63,14 +62,10 @@ static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
}
/* signal completion unless @done is NULL */
-static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
+static void cpu_stop_signal_done(struct cpu_stop_done *done)
{
- if (done) {
- if (executed)
- done->executed = true;
- if (atomic_dec_and_test(&done->nr_todo))
- complete(&done->completion);
- }
+ if (atomic_dec_and_test(&done->nr_todo))
+ complete(&done->completion);
}
static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
@@ -81,17 +76,21 @@ static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
}
/* queue @work to @stopper. if offline, @work is completed immediately */
-static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
+static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
{
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
unsigned long flags;
+ bool enabled;
spin_lock_irqsave(&stopper->lock, flags);
- if (stopper->enabled)
+ enabled = stopper->enabled;
+ if (enabled)
__cpu_stop_queue_work(stopper, work);
- else
- cpu_stop_signal_done(work->done, false);
+ else if (work->done)
+ cpu_stop_signal_done(work->done);
spin_unlock_irqrestore(&stopper->lock, flags);
+
+ return enabled;
}
/**
@@ -124,9 +123,10 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
cpu_stop_init_done(&done, 1);
- cpu_stop_queue_work(cpu, &work);
+ if (!cpu_stop_queue_work(cpu, &work))
+ return -ENOENT;
wait_for_completion(&done.completion);
- return done.executed ? done.ret : -ENOENT;
+ return done.ret;
}
/* This controls the threads on each CPU. */
@@ -258,7 +258,6 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
struct cpu_stop_work work1, work2;
struct multi_stop_data msdata;
- preempt_disable();
msdata = (struct multi_stop_data){
.fn = fn,
.data = arg,
@@ -277,16 +276,11 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
if (cpu1 > cpu2)
swap(cpu1, cpu2);
- if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2)) {
- preempt_enable();
+ if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2))
return -ENOENT;
- }
-
- preempt_enable();
wait_for_completion(&done.completion);
-
- return done.executed ? done.ret : -ENOENT;
+ return done.ret;
}
/**
@@ -302,23 +296,28 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
*
* CONTEXT:
* Don't care.
+ *
+ * RETURNS:
+ * true if cpu_stop_work was queued successfully and @fn will be called,
+ * false otherwise.
*/
-void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
+bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
struct cpu_stop_work *work_buf)
{
*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
- cpu_stop_queue_work(cpu, work_buf);
+ return cpu_stop_queue_work(cpu, work_buf);
}
/* static data for stop_cpus */
static DEFINE_MUTEX(stop_cpus_mutex);
-static void queue_stop_cpus_work(const struct cpumask *cpumask,
+static bool queue_stop_cpus_work(const struct cpumask *cpumask,
cpu_stop_fn_t fn, void *arg,
struct cpu_stop_done *done)
{
struct cpu_stop_work *work;
unsigned int cpu;
+ bool queued = false;
/*
* Disable preemption while queueing to avoid getting
@@ -331,9 +330,12 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
work->fn = fn;
work->arg = arg;
work->done = done;
- cpu_stop_queue_work(cpu, work);
+ if (cpu_stop_queue_work(cpu, work))
+ queued = true;
}
lg_global_unlock(&stop_cpus_lock);
+
+ return queued;
}
static int __stop_cpus(const struct cpumask *cpumask,
@@ -342,9 +344,10 @@ static int __stop_cpus(const struct cpumask *cpumask,
struct cpu_stop_done done;
cpu_stop_init_done(&done, cpumask_weight(cpumask));
- queue_stop_cpus_work(cpumask, fn, arg, &done);
+ if (!queue_stop_cpus_work(cpumask, fn, arg, &done))
+ return -ENOENT;
wait_for_completion(&done.completion);
- return done.executed ? done.ret : -ENOENT;
+ return done.ret;
}
/**
@@ -432,7 +435,6 @@ static void cpu_stopper_thread(unsigned int cpu)
{
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
struct cpu_stop_work *work;
- int ret;
repeat:
work = NULL;
@@ -448,23 +450,19 @@ repeat:
cpu_stop_fn_t fn = work->fn;
void *arg = work->arg;
struct cpu_stop_done *done = work->done;
- char ksym_buf[KSYM_NAME_LEN] __maybe_unused;
-
- /* cpu stop callbacks are not allowed to sleep */
- preempt_disable();
+ int ret;
+ /* cpu stop callbacks must not sleep, make in_atomic() == T */
+ preempt_count_inc();
ret = fn(arg);
- if (ret)
- done->ret = ret;
-
- /* restore preemption and check it's still balanced */
- preempt_enable();
+ if (done) {
+ if (ret)
+ done->ret = ret;
+ cpu_stop_signal_done(done);
+ }
+ preempt_count_dec();
WARN_ONCE(preempt_count(),
- "cpu_stop: %s(%p) leaked preempt count\n",
- kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL,
- ksym_buf), arg);
-
- cpu_stop_signal_done(done, true);
+ "cpu_stop: %pf(%p) leaked preempt count\n", fn, arg);
goto repeat;
}
}
@@ -531,7 +529,7 @@ static int __init cpu_stop_init(void)
}
early_initcall(cpu_stop_init);
-#ifdef CONFIG_STOP_MACHINE
+#if defined(CONFIG_SMP) || defined(CONFIG_HOTPLUG_CPU)
static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
{
@@ -631,4 +629,4 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
return ret ?: done.ret;
}
-#endif /* CONFIG_STOP_MACHINE */
+#endif /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 7fbba635a549..e840ed867a5d 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -271,11 +271,27 @@ static int alarmtimer_suspend(struct device *dev)
__pm_wakeup_event(ws, MSEC_PER_SEC);
return ret;
}
+
+static int alarmtimer_resume(struct device *dev)
+{
+ struct rtc_device *rtc;
+
+ rtc = alarmtimer_get_rtcdev();
+ if (rtc)
+ rtc_timer_cancel(rtc, &rtctimer);
+ return 0;
+}
+
#else
static int alarmtimer_suspend(struct device *dev)
{
return 0;
}
+
+static int alarmtimer_resume(struct device *dev)
+{
+ return 0;
+}
#endif
static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
@@ -800,6 +816,7 @@ out:
/* Suspend hook structures */
static const struct dev_pm_ops alarmtimer_pm_ops = {
.suspend = alarmtimer_suspend,
+ .resume = alarmtimer_resume,
};
static struct platform_driver alarmtimer_driver = {
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 1347882d131e..664de539299b 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -218,8 +218,8 @@ static void clocksource_watchdog(unsigned long data)
/* Check the deviation from the watchdog clocksource. */
if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
- pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n",
- cs->name);
+ pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
+ smp_processor_id(), cs->name);
pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
watchdog->name, wdnow, wdlast, watchdog->mask);
pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 149cc8086aea..36f2ca09aa5e 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -16,8 +16,11 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/rtc.h>
+#include <linux/math64.h>
#include "ntp_internal.h"
+#include "timekeeping_internal.h"
+
/*
* NTP timekeeping variables:
@@ -70,7 +73,7 @@ static long time_esterror = NTP_PHASE_LIMIT;
static s64 time_freq;
/* time at last adjustment (secs): */
-static long time_reftime;
+static time64_t time_reftime;
static long time_adjust;
@@ -297,25 +300,27 @@ static void ntp_update_offset(long offset)
if (!(time_status & STA_PLL))
return;
- if (!(time_status & STA_NANO))
+ if (!(time_status & STA_NANO)) {
+ /* Make sure the multiplication below won't overflow */
+ offset = clamp(offset, -USEC_PER_SEC, USEC_PER_SEC);
offset *= NSEC_PER_USEC;
+ }
/*
* Scale the phase adjustment and
* clamp to the operating range.
*/
- offset = min(offset, MAXPHASE);
- offset = max(offset, -MAXPHASE);
+ offset = clamp(offset, -MAXPHASE, MAXPHASE);
/*
* Select how the frequency is to be controlled
* and in which mode (PLL or FLL).
*/
- secs = get_seconds() - time_reftime;
+ secs = (long)(__ktime_get_real_seconds() - time_reftime);
if (unlikely(time_status & STA_FREQHOLD))
secs = 0;
- time_reftime = get_seconds();
+ time_reftime = __ktime_get_real_seconds();
offset64 = offset;
freq_adj = ntp_update_offset_fll(offset64, secs);
@@ -390,10 +395,11 @@ ktime_t ntp_get_next_leap(void)
*
* Also handles leap second processing, and returns leap offset
*/
-int second_overflow(unsigned long secs)
+int second_overflow(time64_t secs)
{
s64 delta;
int leap = 0;
+ s32 rem;
/*
* Leap second processing. If in leap-insert state at the end of the
@@ -404,19 +410,19 @@ int second_overflow(unsigned long secs)
case TIME_OK:
if (time_status & STA_INS) {
time_state = TIME_INS;
- ntp_next_leap_sec = secs + SECS_PER_DAY -
- (secs % SECS_PER_DAY);
+ div_s64_rem(secs, SECS_PER_DAY, &rem);
+ ntp_next_leap_sec = secs + SECS_PER_DAY - rem;
} else if (time_status & STA_DEL) {
time_state = TIME_DEL;
- ntp_next_leap_sec = secs + SECS_PER_DAY -
- ((secs+1) % SECS_PER_DAY);
+ div_s64_rem(secs + 1, SECS_PER_DAY, &rem);
+ ntp_next_leap_sec = secs + SECS_PER_DAY - rem;
}
break;
case TIME_INS:
if (!(time_status & STA_INS)) {
ntp_next_leap_sec = TIME64_MAX;
time_state = TIME_OK;
- } else if (secs % SECS_PER_DAY == 0) {
+ } else if (secs == ntp_next_leap_sec) {
leap = -1;
time_state = TIME_OOP;
printk(KERN_NOTICE
@@ -427,7 +433,7 @@ int second_overflow(unsigned long secs)
if (!(time_status & STA_DEL)) {
ntp_next_leap_sec = TIME64_MAX;
time_state = TIME_OK;
- } else if ((secs + 1) % SECS_PER_DAY == 0) {
+ } else if (secs == ntp_next_leap_sec) {
leap = 1;
ntp_next_leap_sec = TIME64_MAX;
time_state = TIME_WAIT;
@@ -590,7 +596,7 @@ static inline void process_adj_status(struct timex *txc, struct timespec64 *ts)
* reference time to current time.
*/
if (!(time_status & STA_PLL) && (txc->status & STA_PLL))
- time_reftime = get_seconds();
+ time_reftime = __ktime_get_real_seconds();
/* only set allowed bits */
time_status &= STA_RONLY;
@@ -674,8 +680,14 @@ int ntp_validate_timex(struct timex *txc)
return -EINVAL;
}
- if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME)))
- return -EPERM;
+ if (txc->modes & ADJ_SETOFFSET) {
+ /* In order to inject time, you gotta be super-user! */
+ if (!capable(CAP_SYS_TIME))
+ return -EPERM;
+
+ if (!timeval_inject_offset_valid(&txc->time))
+ return -EINVAL;
+ }
/*
* Check for potential multiplication overflows that can
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index af924470eac0..d8a7c11fa71a 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -6,7 +6,7 @@ extern void ntp_clear(void);
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
extern u64 ntp_tick_length(void);
extern ktime_t ntp_get_next_leap(void);
-extern int second_overflow(unsigned long secs);
+extern int second_overflow(time64_t secs);
extern int ntp_validate_timex(struct timex *);
extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *);
extern void __hardpps(const struct timespec64 *, const struct timespec64 *);
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index ce033c7aa2e8..9cff0ab82b63 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -69,10 +69,10 @@ static ssize_t posix_clock_read(struct file *fp, char __user *buf,
static unsigned int posix_clock_poll(struct file *fp, poll_table *wait)
{
struct posix_clock *clk = get_posix_clock(fp);
- int result = 0;
+ unsigned int result = 0;
if (!clk)
- return -ENODEV;
+ return POLLERR;
if (clk->ops.poll)
result = clk->ops.poll(clk, fp, wait);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 7c7ec4515983..9cc20af58c76 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -143,7 +143,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
* when we go busy again does not account too much ticks.
*/
if (ts->tick_stopped) {
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog_sched();
if (is_idle_task(current))
ts->idle_jiffies++;
}
@@ -430,7 +430,7 @@ static void tick_nohz_update_jiffies(ktime_t now)
tick_do_update_jiffies64(now);
local_irq_restore(flags);
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog_sched();
}
/*
@@ -603,15 +603,31 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
/*
* If the tick is due in the next period, keep it ticking or
- * restart it proper.
+ * force prod the timer.
*/
delta = next_tick - basemono;
if (delta <= (u64)TICK_NSEC) {
tick.tv64 = 0;
+ /*
+ * We've not stopped the tick yet, and there's a timer in the
+ * next period, so no point in stopping it either, bail.
+ */
if (!ts->tick_stopped)
goto out;
+
+ /*
+ * If, OTOH, we did stop it, but there's a pending (expired)
+ * timer reprogram the timer hardware to fire now.
+ *
+ * We will not restart the tick proper, just prod the timer
+ * hardware into firing an interrupt to process the pending
+ * timers. Just like tick_irq_exit() will not restart the tick
+ * for 'normal' interrupts.
+ *
+ * Only once we exit the idle loop will we re-enable the tick,
+ * see tick_nohz_idle_exit().
+ */
if (delta == 0) {
- /* Tick is stopped, but required now. Enforce it */
tick_nohz_restart(ts, now);
goto out;
}
@@ -694,14 +710,14 @@ out:
return tick;
}
-static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
+static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now, int active)
{
/* Update jiffies first */
tick_do_update_jiffies64(now);
- update_cpu_load_nohz();
+ update_cpu_load_nohz(active);
calc_load_exit_idle();
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog_sched();
/*
* Cancel the scheduled timer and restore the tick
*/
@@ -725,7 +741,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
if (can_stop_full_tick())
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
else if (ts->tick_stopped)
- tick_nohz_restart_sched_tick(ts, ktime_get());
+ tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
#endif
}
@@ -875,7 +891,7 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
unsigned long ticks;
- if (vtime_accounting_enabled())
+ if (vtime_accounting_cpu_enabled())
return;
/*
* We stopped the tick in idle. Update process times would miss the
@@ -916,7 +932,7 @@ void tick_nohz_idle_exit(void)
tick_nohz_stop_idle(ts, now);
if (ts->tick_stopped) {
- tick_nohz_restart_sched_tick(ts, now);
+ tick_nohz_restart_sched_tick(ts, now, 0);
tick_nohz_account_idle_ticks(ts);
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d563c1960302..34b4cedfa80d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -305,8 +305,7 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
delta = timekeeping_get_delta(tkr);
- nsec = delta * tkr->mult + tkr->xtime_nsec;
- nsec >>= tkr->shift;
+ nsec = (delta * tkr->mult + tkr->xtime_nsec) >> tkr->shift;
/* If arch requires, add in get_arch_timeoffset() */
return nsec + arch_gettimeoffset();
@@ -846,6 +845,19 @@ time64_t ktime_get_real_seconds(void)
}
EXPORT_SYMBOL_GPL(ktime_get_real_seconds);
+/**
+ * __ktime_get_real_seconds - The same as ktime_get_real_seconds
+ * but without the sequence counter protect. This internal function
+ * is called just when timekeeping lock is already held.
+ */
+time64_t __ktime_get_real_seconds(void)
+{
+ struct timekeeper *tk = &tk_core.timekeeper;
+
+ return tk->xtime_sec;
+}
+
+
#ifdef CONFIG_NTP_PPS
/**
@@ -959,7 +971,7 @@ int timekeeping_inject_offset(struct timespec *ts)
struct timespec64 ts64, tmp;
int ret = 0;
- if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
+ if (!timespec_inject_offset_valid(ts))
return -EINVAL;
ts64 = timespec_to_timespec64(*ts);
@@ -1592,9 +1604,12 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
{
s64 interval = tk->cycle_interval;
s64 xinterval = tk->xtime_interval;
+ u32 base = tk->tkr_mono.clock->mult;
+ u32 max = tk->tkr_mono.clock->maxadj;
+ u32 cur_adj = tk->tkr_mono.mult;
s64 tick_error;
bool negative;
- u32 adj;
+ u32 adj_scale;
/* Remove any current error adj from freq calculation */
if (tk->ntp_err_mult)
@@ -1613,13 +1628,33 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
/* preserve the direction of correction */
negative = (tick_error < 0);
- /* Sort out the magnitude of the correction */
+ /* If any adjustment would pass the max, just return */
+ if (negative && (cur_adj - 1) <= (base - max))
+ return;
+ if (!negative && (cur_adj + 1) >= (base + max))
+ return;
+ /*
+ * Sort out the magnitude of the correction, but
+ * avoid making so large a correction that we go
+ * over the max adjustment.
+ */
+ adj_scale = 0;
tick_error = abs(tick_error);
- for (adj = 0; tick_error > interval; adj++)
+ while (tick_error > interval) {
+ u32 adj = 1 << (adj_scale + 1);
+
+ /* Check if adjustment gets us within 1 unit from the max */
+ if (negative && (cur_adj - adj) <= (base - max))
+ break;
+ if (!negative && (cur_adj + adj) >= (base + max))
+ break;
+
+ adj_scale++;
tick_error >>= 1;
+ }
/* scale the corrections */
- timekeeping_apply_adjustment(tk, offset, negative, adj);
+ timekeeping_apply_adjustment(tk, offset, negative, adj_scale);
}
/*
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index 4ea005a7f9da..5be76270ec4a 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -17,7 +17,11 @@ static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t mask)
{
cycle_t ret = (now - last) & mask;
- return (s64) ret > 0 ? ret : 0;
+ /*
+ * Prevent time going backwards by checking the MSB of mask in
+ * the result. If set, return 0.
+ */
+ return ret & ~(mask >> 1) ? 0 : ret;
}
#else
static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t mask)
@@ -26,4 +30,6 @@ static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t mask)
}
#endif
+extern time64_t __ktime_get_real_seconds(void);
+
#endif /* _TIMEKEEPING_INTERNAL_H */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 75f1d05ea82d..9c6045a27ba3 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1887,12 +1887,6 @@ rb_event_index(struct ring_buffer_event *event)
return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
}
-static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
-{
- cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
- cpu_buffer->reader_page->read = 0;
-}
-
static void rb_inc_iter(struct ring_buffer_iter *iter)
{
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
@@ -2803,8 +2797,11 @@ rb_reserve_next_event(struct ring_buffer *buffer,
event = __rb_reserve_next(cpu_buffer, &info);
- if (unlikely(PTR_ERR(event) == -EAGAIN))
+ if (unlikely(PTR_ERR(event) == -EAGAIN)) {
+ if (info.add_timestamp)
+ info.length -= RB_LEN_TIME_EXTEND;
goto again;
+ }
if (!event)
goto out_fail;
@@ -3626,7 +3623,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
/* Finally update the reader page to the new head */
cpu_buffer->reader_page = reader;
- rb_reset_reader_page(cpu_buffer);
+ cpu_buffer->reader_page->read = 0;
if (overwrite != cpu_buffer->last_overrun) {
cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
@@ -3636,6 +3633,10 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
goto again;
out:
+ /* Update the read_stamp on the first event */
+ if (reader && reader->read == 0)
+ cpu_buffer->read_stamp = reader->page->time_stamp;
+
arch_spin_unlock(&cpu_buffer->lock);
local_irq_restore(flags);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index abfc903e741e..cc9f7a9319be 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -1,7 +1,7 @@
/*
* trace event based perf event profiling/tracing
*
- * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra
* Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
*/
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 6bbc5f652355..4f6ef6912e00 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -582,6 +582,12 @@ static void __ftrace_clear_event_pids(struct trace_array *tr)
unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
+ unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr);
+ unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr);
+
+ unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
+ unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
+
list_for_each_entry(file, &tr->events, list) {
clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
}
@@ -1729,6 +1735,16 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
tr, INT_MAX);
register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
tr, 0);
+
+ register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
+ tr, INT_MAX);
+ register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
+ tr, 0);
+
+ register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
+ tr, INT_MAX);
+ register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
+ tr, 0);
}
/*
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 1c2b28536feb..060df67dbdd1 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -273,6 +273,7 @@ static const char **find_next(void *v, loff_t *pos)
if (*pos < last_index + start_index)
return __start___tracepoint_str + (*pos - last_index);
+ start_index += last_index;
return find_next_mod_format(start_index, v, fmt, pos);
}
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 18f34cf75f74..b3ace6ebbba3 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -20,6 +20,7 @@
#include <linux/smpboot.h>
#include <linux/sched/rt.h>
#include <linux/tick.h>
+#include <linux/workqueue.h>
#include <asm/irq_regs.h>
#include <linux/kvm_para.h>
@@ -225,7 +226,15 @@ static void __touch_watchdog(void)
__this_cpu_write(watchdog_touch_ts, get_timestamp());
}
-void touch_softlockup_watchdog(void)
+/**
+ * touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
+ *
+ * Call when the scheduler may have stalled for legitimate reasons
+ * preventing the watchdog task from executing - e.g. the scheduler
+ * entering idle state. This should only be used for scheduler events.
+ * Use touch_softlockup_watchdog() for everything else.
+ */
+void touch_softlockup_watchdog_sched(void)
{
/*
* Preemption can be enabled. It doesn't matter which CPU's timestamp
@@ -233,6 +242,12 @@ void touch_softlockup_watchdog(void)
*/
raw_cpu_write(watchdog_touch_ts, 0);
}
+
+void touch_softlockup_watchdog(void)
+{
+ touch_softlockup_watchdog_sched();
+ wq_watchdog_touch(raw_smp_processor_id());
+}
EXPORT_SYMBOL(touch_softlockup_watchdog);
void touch_all_softlockup_watchdogs(void)
@@ -246,6 +261,7 @@ void touch_all_softlockup_watchdogs(void)
*/
for_each_watchdog_cpu(cpu)
per_cpu(watchdog_touch_ts, cpu) = 0;
+ wq_watchdog_touch(-1);
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR
@@ -351,7 +367,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
trigger_allbutself_cpu_backtrace();
if (hardlockup_panic)
- panic("Hard LOCKUP");
+ nmi_panic(regs, "Hard LOCKUP");
__this_cpu_write(hard_watchdog_warn, true);
return;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c579dbab2e36..61a0264e28f9 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -148,6 +148,8 @@ struct worker_pool {
int id; /* I: pool ID */
unsigned int flags; /* X: flags */
+ unsigned long watchdog_ts; /* L: watchdog timestamp */
+
struct list_head worklist; /* L: list of pending works */
int nr_workers; /* L: total number of workers */
@@ -1083,6 +1085,8 @@ static void pwq_activate_delayed_work(struct work_struct *work)
struct pool_workqueue *pwq = get_work_pwq(work);
trace_workqueue_activate_work(work);
+ if (list_empty(&pwq->pool->worklist))
+ pwq->pool->watchdog_ts = jiffies;
move_linked_works(work, &pwq->pool->worklist, NULL);
__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
pwq->nr_active++;
@@ -1385,6 +1389,8 @@ retry:
trace_workqueue_activate_work(work);
pwq->nr_active++;
worklist = &pwq->pool->worklist;
+ if (list_empty(worklist))
+ pwq->pool->watchdog_ts = jiffies;
} else {
work_flags |= WORK_STRUCT_DELAYED;
worklist = &pwq->delayed_works;
@@ -2157,6 +2163,8 @@ recheck:
list_first_entry(&pool->worklist,
struct work_struct, entry);
+ pool->watchdog_ts = jiffies;
+
if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
/* optimization path, not strictly necessary */
process_one_work(worker, work);
@@ -2240,6 +2248,7 @@ repeat:
struct pool_workqueue, mayday_node);
struct worker_pool *pool = pwq->pool;
struct work_struct *work, *n;
+ bool first = true;
__set_current_state(TASK_RUNNING);
list_del_init(&pwq->mayday_node);
@@ -2256,9 +2265,14 @@ repeat:
* process'em.
*/
WARN_ON_ONCE(!list_empty(scheduled));
- list_for_each_entry_safe(work, n, &pool->worklist, entry)
- if (get_work_pwq(work) == pwq)
+ list_for_each_entry_safe(work, n, &pool->worklist, entry) {
+ if (get_work_pwq(work) == pwq) {
+ if (first)
+ pool->watchdog_ts = jiffies;
move_linked_works(work, scheduled, &n);
+ }
+ first = false;
+ }
if (!list_empty(scheduled)) {
process_scheduled_works(rescuer);
@@ -2316,6 +2330,37 @@ repeat:
goto repeat;
}
+/**
+ * check_flush_dependency - check for flush dependency sanity
+ * @target_wq: workqueue being flushed
+ * @target_work: work item being flushed (NULL for workqueue flushes)
+ *
+ * %current is trying to flush the whole @target_wq or @target_work on it.
+ * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not
+ * reclaiming memory or running on a workqueue which doesn't have
+ * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to
+ * a deadlock.
+ */
+static void check_flush_dependency(struct workqueue_struct *target_wq,
+ struct work_struct *target_work)
+{
+ work_func_t target_func = target_work ? target_work->func : NULL;
+ struct worker *worker;
+
+ if (target_wq->flags & WQ_MEM_RECLAIM)
+ return;
+
+ worker = current_wq_worker();
+
+ WARN_ONCE(current->flags & PF_MEMALLOC,
+ "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
+ current->pid, current->comm, target_wq->name, target_func);
+ WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),
+ "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
+ worker->current_pwq->wq->name, worker->current_func,
+ target_wq->name, target_func);
+}
+
struct wq_barrier {
struct work_struct work;
struct completion done;
@@ -2525,6 +2570,8 @@ void flush_workqueue(struct workqueue_struct *wq)
list_add_tail(&this_flusher.list, &wq->flusher_overflow);
}
+ check_flush_dependency(wq, NULL);
+
mutex_unlock(&wq->mutex);
wait_for_completion(&this_flusher.done);
@@ -2697,6 +2744,8 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
pwq = worker->current_pwq;
}
+ check_flush_dependency(pwq->wq, work);
+
insert_wq_barrier(pwq, barr, work, worker);
spin_unlock_irq(&pool->lock);
@@ -3069,6 +3118,7 @@ static int init_worker_pool(struct worker_pool *pool)
pool->cpu = -1;
pool->node = NUMA_NO_NODE;
pool->flags |= POOL_DISASSOCIATED;
+ pool->watchdog_ts = jiffies;
INIT_LIST_HEAD(&pool->worklist);
INIT_LIST_HEAD(&pool->idle_list);
hash_init(pool->busy_hash);
@@ -3601,7 +3651,6 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
const struct workqueue_attrs *attrs)
{
struct apply_wqattrs_ctx *ctx;
- int ret = -ENOMEM;
/* only unbound workqueues can change attributes */
if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
@@ -3612,16 +3661,14 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
return -EINVAL;
ctx = apply_wqattrs_prepare(wq, attrs);
+ if (!ctx)
+ return -ENOMEM;
/* the ctx has been prepared successfully, let's commit it */
- if (ctx) {
- apply_wqattrs_commit(ctx);
- ret = 0;
- }
-
+ apply_wqattrs_commit(ctx);
apply_wqattrs_cleanup(ctx);
- return ret;
+ return 0;
}
/**
@@ -4308,7 +4355,9 @@ void show_workqueue_state(void)
pr_info("pool %d:", pool->id);
pr_cont_pool_info(pool);
- pr_cont(" workers=%d", pool->nr_workers);
+ pr_cont(" hung=%us workers=%d",
+ jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
+ pool->nr_workers);
if (pool->manager)
pr_cont(" manager: %d",
task_pid_nr(pool->manager->task));
@@ -5167,6 +5216,154 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
#endif /* CONFIG_SYSFS */
+/*
+ * Workqueue watchdog.
+ *
+ * Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal
+ * flush dependency, a concurrency managed work item which stays RUNNING
+ * indefinitely. Workqueue stalls can be very difficult to debug as the
+ * usual warning mechanisms don't trigger and internal workqueue state is
+ * largely opaque.
+ *
+ * Workqueue watchdog monitors all worker pools periodically and dumps
+ * state if some pools failed to make forward progress for a while where
+ * forward progress is defined as the first item on ->worklist changing.
+ *
+ * This mechanism is controlled through the kernel parameter
+ * "workqueue.watchdog_thresh" which can be updated at runtime through the
+ * corresponding sysfs parameter file.
+ */
+#ifdef CONFIG_WQ_WATCHDOG
+
+static void wq_watchdog_timer_fn(unsigned long data);
+
+static unsigned long wq_watchdog_thresh = 30;
+static struct timer_list wq_watchdog_timer =
+ TIMER_DEFERRED_INITIALIZER(wq_watchdog_timer_fn, 0, 0);
+
+static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
+static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
+
+static void wq_watchdog_reset_touched(void)
+{
+ int cpu;
+
+ wq_watchdog_touched = jiffies;
+ for_each_possible_cpu(cpu)
+ per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
+}
+
+static void wq_watchdog_timer_fn(unsigned long data)
+{
+ unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
+ bool lockup_detected = false;
+ struct worker_pool *pool;
+ int pi;
+
+ if (!thresh)
+ return;
+
+ rcu_read_lock();
+
+ for_each_pool(pool, pi) {
+ unsigned long pool_ts, touched, ts;
+
+ if (list_empty(&pool->worklist))
+ continue;
+
+ /* get the latest of pool and touched timestamps */
+ pool_ts = READ_ONCE(pool->watchdog_ts);
+ touched = READ_ONCE(wq_watchdog_touched);
+
+ if (time_after(pool_ts, touched))
+ ts = pool_ts;
+ else
+ ts = touched;
+
+ if (pool->cpu >= 0) {
+ unsigned long cpu_touched =
+ READ_ONCE(per_cpu(wq_watchdog_touched_cpu,
+ pool->cpu));
+ if (time_after(cpu_touched, ts))
+ ts = cpu_touched;
+ }
+
+ /* did we stall? */
+ if (time_after(jiffies, ts + thresh)) {
+ lockup_detected = true;
+ pr_emerg("BUG: workqueue lockup - pool");
+ pr_cont_pool_info(pool);
+ pr_cont(" stuck for %us!\n",
+ jiffies_to_msecs(jiffies - pool_ts) / 1000);
+ }
+ }
+
+ rcu_read_unlock();
+
+ if (lockup_detected)
+ show_workqueue_state();
+
+ wq_watchdog_reset_touched();
+ mod_timer(&wq_watchdog_timer, jiffies + thresh);
+}
+
+void wq_watchdog_touch(int cpu)
+{
+ if (cpu >= 0)
+ per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
+ else
+ wq_watchdog_touched = jiffies;
+}
+
+static void wq_watchdog_set_thresh(unsigned long thresh)
+{
+ wq_watchdog_thresh = 0;
+ del_timer_sync(&wq_watchdog_timer);
+
+ if (thresh) {
+ wq_watchdog_thresh = thresh;
+ wq_watchdog_reset_touched();
+ mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
+ }
+}
+
+static int wq_watchdog_param_set_thresh(const char *val,
+ const struct kernel_param *kp)
+{
+ unsigned long thresh;
+ int ret;
+
+ ret = kstrtoul(val, 0, &thresh);
+ if (ret)
+ return ret;
+
+ if (system_wq)
+ wq_watchdog_set_thresh(thresh);
+ else
+ wq_watchdog_thresh = thresh;
+
+ return 0;
+}
+
+static const struct kernel_param_ops wq_watchdog_thresh_ops = {
+ .set = wq_watchdog_param_set_thresh,
+ .get = param_get_ulong,
+};
+
+module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
+ 0644);
+
+static void wq_watchdog_init(void)
+{
+ wq_watchdog_set_thresh(wq_watchdog_thresh);
+}
+
+#else /* CONFIG_WQ_WATCHDOG */
+
+static inline void wq_watchdog_init(void) { }
+
+#endif /* CONFIG_WQ_WATCHDOG */
+
static void __init wq_numa_init(void)
{
cpumask_var_t *tbl;
@@ -5290,6 +5487,9 @@ static int __init init_workqueues(void)
!system_unbound_wq || !system_freezable_wq ||
!system_power_efficient_wq ||
!system_freezable_power_efficient_wq);
+
+ wq_watchdog_init();
+
return 0;
}
early_initcall(init_workqueues);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8c15b29d5adc..3048bf5b729a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -812,6 +812,17 @@ config BOOTPARAM_HUNG_TASK_PANIC_VALUE
default 0 if !BOOTPARAM_HUNG_TASK_PANIC
default 1 if BOOTPARAM_HUNG_TASK_PANIC
+config WQ_WATCHDOG
+ bool "Detect Workqueue Stalls"
+ depends on DEBUG_KERNEL
+ help
+ Say Y here to enable stall detection on workqueues. If a
+ worker pool doesn't make forward progress on a pending work
+ item for over a given amount of time, 30s by default, a
+ warning message is printed along with dump of workqueue
+ state. This can be configured through kernel parameter
+ "workqueue.watchdog_thresh" and its sysfs counterpart.
+
endmenu # "Debug lockups and hangs"
config PANIC_ON_OOPS
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 83c33a5bcffb..d62de8bf022d 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -16,6 +16,10 @@
#include <linux/kernel.h>
#include <linux/atomic.h>
+#ifdef CONFIG_X86
+#include <asm/processor.h> /* for boot_cpu_has below */
+#endif
+
#define TEST(bit, op, c_op, val) \
do { \
atomic##bit##_set(&v, v0); \
@@ -27,6 +31,65 @@ do { \
(unsigned long long)r); \
} while (0)
+/*
+ * Test for a atomic operation family,
+ * @test should be a macro accepting parameters (bit, op, ...)
+ */
+
+#define FAMILY_TEST(test, bit, op, args...) \
+do { \
+ test(bit, op, ##args); \
+ test(bit, op##_acquire, ##args); \
+ test(bit, op##_release, ##args); \
+ test(bit, op##_relaxed, ##args); \
+} while (0)
+
+#define TEST_RETURN(bit, op, c_op, val) \
+do { \
+ atomic##bit##_set(&v, v0); \
+ r = v0; \
+ r c_op val; \
+ BUG_ON(atomic##bit##_##op(val, &v) != r); \
+ BUG_ON(atomic##bit##_read(&v) != r); \
+} while (0)
+
+#define RETURN_FAMILY_TEST(bit, op, c_op, val) \
+do { \
+ FAMILY_TEST(TEST_RETURN, bit, op, c_op, val); \
+} while (0)
+
+#define TEST_ARGS(bit, op, init, ret, expect, args...) \
+do { \
+ atomic##bit##_set(&v, init); \
+ BUG_ON(atomic##bit##_##op(&v, ##args) != ret); \
+ BUG_ON(atomic##bit##_read(&v) != expect); \
+} while (0)
+
+#define XCHG_FAMILY_TEST(bit, init, new) \
+do { \
+ FAMILY_TEST(TEST_ARGS, bit, xchg, init, init, new, new); \
+} while (0)
+
+#define CMPXCHG_FAMILY_TEST(bit, init, new, wrong) \
+do { \
+ FAMILY_TEST(TEST_ARGS, bit, cmpxchg, \
+ init, init, new, init, new); \
+ FAMILY_TEST(TEST_ARGS, bit, cmpxchg, \
+ init, init, init, wrong, new); \
+} while (0)
+
+#define INC_RETURN_FAMILY_TEST(bit, i) \
+do { \
+ FAMILY_TEST(TEST_ARGS, bit, inc_return, \
+ i, (i) + one, (i) + one); \
+} while (0)
+
+#define DEC_RETURN_FAMILY_TEST(bit, i) \
+do { \
+ FAMILY_TEST(TEST_ARGS, bit, dec_return, \
+ i, (i) - one, (i) - one); \
+} while (0)
+
static __init void test_atomic(void)
{
int v0 = 0xaaa31337;
@@ -45,6 +108,18 @@ static __init void test_atomic(void)
TEST(, and, &=, v1);
TEST(, xor, ^=, v1);
TEST(, andnot, &= ~, v1);
+
+ RETURN_FAMILY_TEST(, add_return, +=, onestwos);
+ RETURN_FAMILY_TEST(, add_return, +=, -one);
+ RETURN_FAMILY_TEST(, sub_return, -=, onestwos);
+ RETURN_FAMILY_TEST(, sub_return, -=, -one);
+
+ INC_RETURN_FAMILY_TEST(, v0);
+ DEC_RETURN_FAMILY_TEST(, v0);
+
+ XCHG_FAMILY_TEST(, v0, v1);
+ CMPXCHG_FAMILY_TEST(, v0, v1, onestwos);
+
}
#define INIT(c) do { atomic64_set(&v, c); r = c; } while (0)
@@ -74,25 +149,10 @@ static __init void test_atomic64(void)
TEST(64, xor, ^=, v1);
TEST(64, andnot, &= ~, v1);
- INIT(v0);
- r += onestwos;
- BUG_ON(atomic64_add_return(onestwos, &v) != r);
- BUG_ON(v.counter != r);
-
- INIT(v0);
- r += -one;
- BUG_ON(atomic64_add_return(-one, &v) != r);
- BUG_ON(v.counter != r);
-
- INIT(v0);
- r -= onestwos;
- BUG_ON(atomic64_sub_return(onestwos, &v) != r);
- BUG_ON(v.counter != r);
-
- INIT(v0);
- r -= -one;
- BUG_ON(atomic64_sub_return(-one, &v) != r);
- BUG_ON(v.counter != r);
+ RETURN_FAMILY_TEST(64, add_return, +=, onestwos);
+ RETURN_FAMILY_TEST(64, add_return, +=, -one);
+ RETURN_FAMILY_TEST(64, sub_return, -=, onestwos);
+ RETURN_FAMILY_TEST(64, sub_return, -=, -one);
INIT(v0);
atomic64_inc(&v);
@@ -100,33 +160,15 @@ static __init void test_atomic64(void)
BUG_ON(v.counter != r);
INIT(v0);
- r += one;
- BUG_ON(atomic64_inc_return(&v) != r);
- BUG_ON(v.counter != r);
-
- INIT(v0);
atomic64_dec(&v);
r -= one;
BUG_ON(v.counter != r);
- INIT(v0);
- r -= one;
- BUG_ON(atomic64_dec_return(&v) != r);
- BUG_ON(v.counter != r);
+ INC_RETURN_FAMILY_TEST(64, v0);
+ DEC_RETURN_FAMILY_TEST(64, v0);
- INIT(v0);
- BUG_ON(atomic64_xchg(&v, v1) != v0);
- r = v1;
- BUG_ON(v.counter != r);
-
- INIT(v0);
- BUG_ON(atomic64_cmpxchg(&v, v0, v1) != v0);
- r = v1;
- BUG_ON(v.counter != r);
-
- INIT(v0);
- BUG_ON(atomic64_cmpxchg(&v, v2, v1) != v0);
- BUG_ON(v.counter != r);
+ XCHG_FAMILY_TEST(64, v0, v1);
+ CMPXCHG_FAMILY_TEST(64, v0, v1, v2);
INIT(v0);
BUG_ON(atomic64_add_unless(&v, one, v0));
diff --git a/lib/btree.c b/lib/btree.c
index 4264871ea1a0..f93a945274af 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -5,7 +5,7 @@
*
* Copyright (c) 2007-2008 Joern Engel <joern@logfs.org>
* Bits and pieces stolen from Peter Zijlstra's code, which is
- * Copyright 2007, Red Hat Inc. Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright 2007, Red Hat Inc. Peter Zijlstra
* GPLv2
*
* see http://programming.kicks-ass.net/kernel-patches/vma_lookup/btree.patch
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 8855f019ebe8..d34bd24c2c84 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -1464,7 +1464,7 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size,
entry->type = dma_debug_coherent;
entry->dev = dev;
entry->pfn = page_to_pfn(virt_to_page(virt));
- entry->offset = (size_t) virt & PAGE_MASK;
+ entry->offset = (size_t) virt & ~PAGE_MASK;
entry->size = size;
entry->dev_addr = dma_addr;
entry->direction = DMA_BIDIRECTIONAL;
@@ -1480,7 +1480,7 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
.type = dma_debug_coherent,
.dev = dev,
.pfn = page_to_pfn(virt_to_page(virt)),
- .offset = (size_t) virt & PAGE_MASK,
+ .offset = (size_t) virt & ~PAGE_MASK,
.dev_addr = addr,
.size = size,
.direction = DMA_BIDIRECTIONAL,
diff --git a/lib/list_debug.c b/lib/list_debug.c
index c24c2f7e296f..3859bf63561c 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -37,7 +37,7 @@ void __list_add(struct list_head *new,
next->prev = new;
new->next = next;
new->prev = prev;
- prev->next = new;
+ WRITE_ONCE(prev->next, new);
}
EXPORT_SYMBOL(__list_add);
diff --git a/lib/proportions.c b/lib/proportions.c
index 6f724298f67a..efa54f259ea9 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -1,7 +1,7 @@
/*
* Floating proportions
*
- * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
*
* Description:
*
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index a54ff8949f91..51282f579760 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -389,33 +389,31 @@ static bool rhashtable_check_elasticity(struct rhashtable *ht,
return false;
}
-int rhashtable_insert_rehash(struct rhashtable *ht)
+int rhashtable_insert_rehash(struct rhashtable *ht,
+ struct bucket_table *tbl)
{
struct bucket_table *old_tbl;
struct bucket_table *new_tbl;
- struct bucket_table *tbl;
unsigned int size;
int err;
old_tbl = rht_dereference_rcu(ht->tbl, ht);
- tbl = rhashtable_last_table(ht, old_tbl);
size = tbl->size;
+ err = -EBUSY;
+
if (rht_grow_above_75(ht, tbl))
size *= 2;
/* Do not schedule more than one rehash */
else if (old_tbl != tbl)
- return -EBUSY;
+ goto fail;
+
+ err = -ENOMEM;
new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
- if (new_tbl == NULL) {
- /* Schedule async resize/rehash to try allocation
- * non-atomic context.
- */
- schedule_work(&ht->run_work);
- return -ENOMEM;
- }
+ if (new_tbl == NULL)
+ goto fail;
err = rhashtable_rehash_attach(ht, tbl, new_tbl);
if (err) {
@@ -426,12 +424,24 @@ int rhashtable_insert_rehash(struct rhashtable *ht)
schedule_work(&ht->run_work);
return err;
+
+fail:
+ /* Do not fail the insert if someone else did a rehash. */
+ if (likely(rcu_dereference_raw(tbl->future_tbl)))
+ return 0;
+
+ /* Schedule async rehash to retry allocation in process context. */
+ if (err == -ENOMEM)
+ schedule_work(&ht->run_work);
+
+ return err;
}
EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
-int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
- struct rhash_head *obj,
- struct bucket_table *tbl)
+struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
+ const void *key,
+ struct rhash_head *obj,
+ struct bucket_table *tbl)
{
struct rhash_head *head;
unsigned int hash;
@@ -467,7 +477,12 @@ int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
exit:
spin_unlock(rht_bucket_lock(tbl, hash));
- return err;
+ if (err == 0)
+ return NULL;
+ else if (err == -EAGAIN)
+ return tbl;
+ else
+ return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
@@ -503,10 +518,11 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
if (!iter->walker)
return -ENOMEM;
- mutex_lock(&ht->mutex);
- iter->walker->tbl = rht_dereference(ht->tbl, ht);
+ spin_lock(&ht->lock);
+ iter->walker->tbl =
+ rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
list_add(&iter->walker->list, &iter->walker->tbl->walkers);
- mutex_unlock(&ht->mutex);
+ spin_unlock(&ht->lock);
return 0;
}
@@ -520,10 +536,10 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init);
*/
void rhashtable_walk_exit(struct rhashtable_iter *iter)
{
- mutex_lock(&iter->ht->mutex);
+ spin_lock(&iter->ht->lock);
if (iter->walker->tbl)
list_del(&iter->walker->list);
- mutex_unlock(&iter->ht->mutex);
+ spin_unlock(&iter->ht->lock);
kfree(iter->walker);
}
EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
@@ -547,14 +563,12 @@ int rhashtable_walk_start(struct rhashtable_iter *iter)
{
struct rhashtable *ht = iter->ht;
- mutex_lock(&ht->mutex);
+ rcu_read_lock();
+ spin_lock(&ht->lock);
if (iter->walker->tbl)
list_del(&iter->walker->list);
-
- rcu_read_lock();
-
- mutex_unlock(&ht->mutex);
+ spin_unlock(&ht->lock);
if (!iter->walker->tbl) {
iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
@@ -723,9 +737,6 @@ int rhashtable_init(struct rhashtable *ht,
if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
return -EINVAL;
- if (params->nelem_hint)
- size = rounded_hashtable_size(params);
-
memset(ht, 0, sizeof(*ht));
mutex_init(&ht->mutex);
spin_lock_init(&ht->lock);
@@ -745,6 +756,9 @@ int rhashtable_init(struct rhashtable *ht,
ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
+ if (params->nelem_hint)
+ size = rounded_hashtable_size(&ht->p);
+
/* The maximum (not average) chain length grows with the
* size of the hash table, at a rate of (log N)/(log log N).
* The value of 16 is selected so that even if the hash
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 8ed2ffd963c5..7340353f8aea 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -957,8 +957,9 @@ EXPORT_SYMBOL(congestion_wait);
* jiffies for either a BDI to exit congestion of the given @sync queue
* or a write to complete.
*
- * In the absence of zone congestion, cond_resched() is called to yield
- * the processor if necessary but otherwise does not sleep.
+ * In the absence of zone congestion, a short sleep or a cond_resched is
+ * performed to yield the processor and to allow other subsystems to make
+ * a forward progress.
*
* The return value is 0 if the sleep is for the full timeout. Otherwise,
* it is the number of jiffies that were still remaining when the function
@@ -978,7 +979,19 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
*/
if (atomic_read(&nr_wb_congested[sync]) == 0 ||
!test_bit(ZONE_CONGESTED, &zone->flags)) {
- cond_resched();
+
+ /*
+ * Memory allocation/reclaim might be called from a WQ
+ * context and the current implementation of the WQ
+ * concurrency control doesn't recognize that a particular
+ * WQ is congested if the worker thread is looping without
+ * ever sleeping. Therefore we have to do a short sleep
+ * here rather than calling cond_resched().
+ */
+ if (current->flags & PF_WQ_WORKER)
+ schedule_timeout(1);
+ else
+ cond_resched();
/* In case we scheduled, work out time remaining */
ret = timeout - (jiffies - start);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 3b6380784c28..91e32bc8517f 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -33,6 +33,7 @@ EXPORT_SYMBOL(contig_page_data);
unsigned long max_low_pfn;
unsigned long min_low_pfn;
unsigned long max_pfn;
+unsigned long long max_possible_pfn;
bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c29ddebc8705..62fe06bb7d04 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2009,7 +2009,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
/*
* Be somewhat over-protective like KSM for now!
*/
- if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
+ if (*vm_flags & VM_NO_THP)
return -EINVAL;
*vm_flags &= ~VM_NOHUGEPAGE;
*vm_flags |= VM_HUGEPAGE;
@@ -2025,7 +2025,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
/*
* Be somewhat over-protective like KSM for now!
*/
- if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP))
+ if (*vm_flags & VM_NO_THP)
return -EINVAL;
*vm_flags &= ~VM_HUGEPAGE;
*vm_flags |= VM_NOHUGEPAGE;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 827bb02a43a4..ef6963b577fd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -372,8 +372,10 @@ retry_locked:
spin_unlock(&resv->lock);
trg = kmalloc(sizeof(*trg), GFP_KERNEL);
- if (!trg)
+ if (!trg) {
+ kfree(nrg);
return -ENOMEM;
+ }
spin_lock(&resv->lock);
list_add(&trg->link, &resv->region_cache);
@@ -483,8 +485,16 @@ static long region_del(struct resv_map *resv, long f, long t)
retry:
spin_lock(&resv->lock);
list_for_each_entry_safe(rg, trg, head, link) {
- if (rg->to <= f)
+ /*
+ * Skip regions before the range to be deleted. file_region
+ * ranges are normally of the form [from, to). However, there
+ * may be a "placeholder" entry in the map which is of the form
+ * (from, to) with from == to. Check for placeholder entries
+ * at the beginning of the range to be deleted.
+ */
+ if (rg->to <= f && (rg->to != rg->from || rg->to != f))
continue;
+
if (rg->from >= t)
break;
@@ -1886,7 +1896,10 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
page = __alloc_buddy_huge_page_with_mpol(h, vma, addr);
if (!page)
goto out_uncharge_cgroup;
-
+ if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
+ SetPagePrivate(page);
+ h->resv_huge_pages--;
+ }
spin_lock(&hugetlb_lock);
list_move(&page->lru, &h->hugepage_activelist);
/* Fall through */
@@ -3693,12 +3706,12 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
return VM_FAULT_HWPOISON_LARGE |
VM_FAULT_SET_HINDEX(hstate_index(h));
+ } else {
+ ptep = huge_pte_alloc(mm, address, huge_page_size(h));
+ if (!ptep)
+ return VM_FAULT_OOM;
}
- ptep = huge_pte_alloc(mm, address, huge_page_size(h));
- if (!ptep)
- return VM_FAULT_OOM;
-
mapping = vma->vm_file->f_mapping;
idx = vma_hugecache_offset(h, vma, address);
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index d41b21bce6a0..bc0a8d8b8f42 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -19,6 +19,7 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/kmemleak.h>
#include <linux/memblock.h>
#include <linux/memory.h>
#include <linux/mm.h>
@@ -444,6 +445,7 @@ int kasan_module_alloc(void *addr, size_t size)
if (ret) {
find_vm_area(addr)->flags |= VM_KASAN;
+ kmemleak_ignore(ret);
return 0;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9acfb165eb52..fc10620967c7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -903,14 +903,20 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
if (prev && reclaim->generation != iter->generation)
goto out_unlock;
- do {
+ while (1) {
pos = READ_ONCE(iter->position);
+ if (!pos || css_tryget(&pos->css))
+ break;
/*
- * A racing update may change the position and
- * put the last reference, hence css_tryget(),
- * or retry to see the updated position.
+ * css reference reached zero, so iter->position will
+ * be cleared by ->css_released. However, we should not
+ * rely on this happening soon, because ->css_released
+ * is called from a work queue, and by busy-waiting we
+ * might block it. So we clear iter->position right
+ * away.
*/
- } while (pos && !css_tryget(&pos->css));
+ (void)cmpxchg(&iter->position, pos, NULL);
+ }
}
if (pos)
@@ -956,17 +962,13 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
}
if (reclaim) {
- if (cmpxchg(&iter->position, pos, memcg) == pos) {
- if (memcg)
- css_get(&memcg->css);
- if (pos)
- css_put(&pos->css);
- }
-
/*
- * pairs with css_tryget when dereferencing iter->position
- * above.
+ * The position could have already been updated by a competing
+ * thread, so check that the value hasn't changed since we read
+ * it to avoid reclaiming from the same cgroup twice.
*/
+ (void)cmpxchg(&iter->position, pos, memcg);
+
if (pos)
css_put(&pos->css);
@@ -999,6 +1001,28 @@ void mem_cgroup_iter_break(struct mem_cgroup *root,
css_put(&prev->css);
}
+static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
+{
+ struct mem_cgroup *memcg = dead_memcg;
+ struct mem_cgroup_reclaim_iter *iter;
+ struct mem_cgroup_per_zone *mz;
+ int nid, zid;
+ int i;
+
+ while ((memcg = parent_mem_cgroup(memcg))) {
+ for_each_node(nid) {
+ for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+ mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
+ for (i = 0; i <= DEF_PRIORITY; i++) {
+ iter = &mz->iter[i];
+ cmpxchg(&iter->position,
+ dead_memcg, NULL);
+ }
+ }
+ }
+ }
+}
+
/*
* Iteration constructs for visiting all cgroups (under a tree). If
* loops are exited prematurely (break), mem_cgroup_iter_break() must
@@ -2128,7 +2152,7 @@ done_restock:
*/
do {
if (page_counter_read(&memcg->memory) > memcg->high) {
- current->memcg_nr_pages_over_high += nr_pages;
+ current->memcg_nr_pages_over_high += batch;
set_notify_resume(current);
break;
}
@@ -4324,6 +4348,13 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
wb_memcg_offline(memcg);
}
+static void mem_cgroup_css_released(struct cgroup_subsys_state *css)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+ invalidate_reclaim_iterators(memcg);
+}
+
static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
@@ -4779,23 +4810,18 @@ static void mem_cgroup_clear_mc(void)
spin_unlock(&mc.lock);
}
-static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
{
- struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+ struct cgroup_subsys_state *css;
+ struct mem_cgroup *memcg;
struct mem_cgroup *from;
struct task_struct *leader, *p;
struct mm_struct *mm;
unsigned long move_flags;
int ret = 0;
- /*
- * We are now commited to this value whatever it is. Changes in this
- * tunable will only affect upcoming migrations, not the current one.
- * So we need to save it, and keep it going.
- */
- move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
- if (!move_flags)
+ /* charge immigration isn't supported on the default hierarchy */
+ if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
return 0;
/*
@@ -4805,13 +4831,23 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
* multiple.
*/
p = NULL;
- cgroup_taskset_for_each_leader(leader, tset) {
+ cgroup_taskset_for_each_leader(leader, css, tset) {
WARN_ON_ONCE(p);
p = leader;
+ memcg = mem_cgroup_from_css(css);
}
if (!p)
return 0;
+ /*
+ * We are now commited to this value whatever it is. Changes in this
+ * tunable will only affect upcoming migrations, not the current one.
+ * So we need to save it, and keep it going.
+ */
+ move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
+ if (!move_flags)
+ return 0;
+
from = mem_cgroup_from_task(p);
VM_BUG_ON(from == memcg);
@@ -4842,8 +4878,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
return ret;
}
-static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
{
if (mc.to)
mem_cgroup_clear_mc();
@@ -4985,10 +5020,10 @@ retry:
atomic_dec(&mc.from->moving_account);
}
-static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void mem_cgroup_move_task(struct cgroup_taskset *tset)
{
- struct task_struct *p = cgroup_taskset_first(tset);
+ struct cgroup_subsys_state *css;
+ struct task_struct *p = cgroup_taskset_first(tset, &css);
struct mm_struct *mm = get_task_mm(p);
if (mm) {
@@ -5000,17 +5035,14 @@ static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
mem_cgroup_clear_mc();
}
#else /* !CONFIG_MMU */
-static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
{
return 0;
}
-static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
{
}
-static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void mem_cgroup_move_task(struct cgroup_taskset *tset)
{
}
#endif
@@ -5184,6 +5216,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
.css_alloc = mem_cgroup_css_alloc,
.css_online = mem_cgroup_css_online,
.css_offline = mem_cgroup_css_offline,
+ .css_released = mem_cgroup_css_released,
.css_free = mem_cgroup_css_free,
.css_reset = mem_cgroup_css_reset,
.can_attach = mem_cgroup_can_attach,
@@ -5511,11 +5544,11 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
* mem_cgroup_replace_page - migrate a charge to another page
* @oldpage: currently charged page
* @newpage: page to transfer the charge to
- * @lrucare: either or both pages might be on the LRU already
*
* Migrate the charge from @oldpage to @newpage.
*
* Both pages must be locked, @newpage->mapping must be set up.
+ * Either or both pages might be on the LRU already.
*/
void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
{
diff --git a/mm/memory.c b/mm/memory.c
index deb679c31f2a..c387430f06c3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3015,9 +3015,9 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
} else {
/*
* The fault handler has no page to lock, so it holds
- * i_mmap_lock for write to protect against truncate.
+ * i_mmap_lock for read to protect against truncate.
*/
- i_mmap_unlock_write(vma->vm_file->f_mapping);
+ i_mmap_unlock_read(vma->vm_file->f_mapping);
}
goto uncharge_out;
}
@@ -3031,9 +3031,9 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
} else {
/*
* The fault handler has no page to lock, so it holds
- * i_mmap_lock for write to protect against truncate.
+ * i_mmap_lock for read to protect against truncate.
*/
- i_mmap_unlock_write(vma->vm_file->f_mapping);
+ i_mmap_unlock_read(vma->vm_file->f_mapping);
}
return ret;
uncharge_out:
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 67d488ab495e..a042a9d537bb 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1375,23 +1375,30 @@ int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
*/
int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
{
- unsigned long pfn;
+ unsigned long pfn, sec_end_pfn;
struct zone *zone = NULL;
struct page *page;
int i;
- for (pfn = start_pfn;
+ for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn);
pfn < end_pfn;
- pfn += MAX_ORDER_NR_PAGES) {
- i = 0;
- /* This is just a CONFIG_HOLES_IN_ZONE check.*/
- while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i))
- i++;
- if (i == MAX_ORDER_NR_PAGES)
+ pfn = sec_end_pfn + 1, sec_end_pfn += PAGES_PER_SECTION) {
+ /* Make sure the memory section is present first */
+ if (!present_section_nr(pfn_to_section_nr(pfn)))
continue;
- page = pfn_to_page(pfn + i);
- if (zone && page_zone(page) != zone)
- return 0;
- zone = page_zone(page);
+ for (; pfn < sec_end_pfn && pfn < end_pfn;
+ pfn += MAX_ORDER_NR_PAGES) {
+ i = 0;
+ /* This is just a CONFIG_HOLES_IN_ZONE check.*/
+ while ((i < MAX_ORDER_NR_PAGES) &&
+ !pfn_valid_within(pfn + i))
+ i++;
+ if (i == MAX_ORDER_NR_PAGES)
+ continue;
+ page = pfn_to_page(pfn + i);
+ if (zone && page_zone(page) != zone)
+ return 0;
+ zone = page_zone(page);
+ }
}
return 1;
}
diff --git a/mm/mremap.c b/mm/mremap.c
index c25bc6268e46..de824e72c3e8 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -319,6 +319,10 @@ static unsigned long move_vma(struct vm_area_struct *vma,
hiwater_vm = mm->hiwater_vm;
vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
+ /* Tell pfnmap has moved from this vma */
+ if (unlikely(vma->vm_flags & VM_PFNMAP))
+ untrack_pfn_moved(vma);
+
if (do_munmap(mm, old_addr, old_len) < 0) {
/* OOM: unable to split vma, just get accounts right */
vm_unacct_memory(excess >> PAGE_SHIFT);
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index e57cf24babd6..99feb2b07fc5 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -31,6 +31,7 @@ EXPORT_SYMBOL(contig_page_data);
unsigned long max_low_pfn;
unsigned long min_low_pfn;
unsigned long max_pfn;
+unsigned long long max_possible_pfn;
static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
u64 goal, u64 limit)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index d13a33918fa2..c12680993ff3 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -608,6 +608,8 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
continue;
if (unlikely(p->flags & PF_KTHREAD))
continue;
+ if (is_global_init(p))
+ continue;
if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
continue;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2c90357c34ea..d15d88c8efa1 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2,7 +2,7 @@
* mm/page-writeback.c
*
* Copyright (C) 2002, Linus Torvalds.
- * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
*
* Contains functions related to writing back dirty pages at the
* address_space level.
@@ -1542,7 +1542,9 @@ static void balance_dirty_pages(struct address_space *mapping,
for (;;) {
unsigned long now = jiffies;
unsigned long dirty, thresh, bg_thresh;
- unsigned long m_dirty, m_thresh, m_bg_thresh;
+ unsigned long m_dirty = 0; /* stop bogus uninit warnings */
+ unsigned long m_thresh = 0;
+ unsigned long m_bg_thresh = 0;
/*
* Unstable writes are a feature of certain networked
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 17a3c66639a9..9d666df5ef95 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3647,8 +3647,9 @@ static void show_migration_types(unsigned char type)
{
static const char types[MIGRATE_TYPES] = {
[MIGRATE_UNMOVABLE] = 'U',
- [MIGRATE_RECLAIMABLE] = 'E',
[MIGRATE_MOVABLE] = 'M',
+ [MIGRATE_RECLAIMABLE] = 'E',
+ [MIGRATE_HIGHATOMIC] = 'H',
#ifdef CONFIG_CMA
[MIGRATE_CMA] = 'C',
#endif
diff --git a/mm/shmem.c b/mm/shmem.c
index 9187eee4128b..5813b7fa85b6 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -843,14 +843,14 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
list_add_tail(&info->swaplist, &shmem_swaplist);
if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
- swap_shmem_alloc(swap);
- shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
-
spin_lock(&info->lock);
- info->swapped++;
shmem_recalc_inode(inode);
+ info->swapped++;
spin_unlock(&info->lock);
+ swap_shmem_alloc(swap);
+ shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
+
mutex_unlock(&shmem_swaplist_mutex);
BUG_ON(page_mapped(page));
swap_writepage(page, wbc);
@@ -1078,7 +1078,7 @@ repeat:
if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
error = -EINVAL;
- goto failed;
+ goto unlock;
}
if (page && sgp == SGP_WRITE)
@@ -1246,11 +1246,15 @@ clear:
/* Perhaps the file has been truncated since we checked */
if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+ if (alloced) {
+ ClearPageDirty(page);
+ delete_from_page_cache(page);
+ spin_lock(&info->lock);
+ shmem_recalc_inode(inode);
+ spin_unlock(&info->lock);
+ }
error = -EINVAL;
- if (alloced)
- goto trunc;
- else
- goto failed;
+ goto unlock;
}
*pagep = page;
return 0;
@@ -1258,23 +1262,13 @@ clear:
/*
* Error recovery.
*/
-trunc:
- info = SHMEM_I(inode);
- ClearPageDirty(page);
- delete_from_page_cache(page);
- spin_lock(&info->lock);
- info->alloced--;
- inode->i_blocks -= BLOCKS_PER_PAGE;
- spin_unlock(&info->lock);
decused:
- sbinfo = SHMEM_SB(inode->i_sb);
if (sbinfo->max_blocks)
percpu_counter_add(&sbinfo->used_blocks, -1);
unacct:
shmem_unacct_blocks(info->flags, 1);
failed:
- if (swap.val && error != -EINVAL &&
- !shmem_confirm_swap(mapping, index, swap))
+ if (swap.val && !shmem_confirm_swap(mapping, index, swap))
error = -EEXIST;
unlock:
if (page) {
@@ -2444,7 +2438,6 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
int len;
struct inode *inode;
struct page *page;
- char *kaddr;
struct shmem_inode_info *info;
len = strlen(symname) + 1;
@@ -2483,9 +2476,8 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
}
inode->i_mapping->a_ops = &shmem_aops;
inode->i_op = &shmem_symlink_inode_operations;
- kaddr = kmap_atomic(page);
- memcpy(kaddr, symname, len);
- kunmap_atomic(kaddr);
+ inode_nohighmem(inode);
+ memcpy(page_address(page), symname, len);
SetPageUptodate(page);
set_page_dirty(page);
unlock_page(page);
@@ -2498,23 +2490,34 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
return 0;
}
-static const char *shmem_follow_link(struct dentry *dentry, void **cookie)
+static void shmem_put_link(void *arg)
{
- struct page *page = NULL;
- int error = shmem_getpage(d_inode(dentry), 0, &page, SGP_READ, NULL);
- if (error)
- return ERR_PTR(error);
- unlock_page(page);
- *cookie = page;
- return kmap(page);
+ mark_page_accessed(arg);
+ put_page(arg);
}
-static void shmem_put_link(struct inode *unused, void *cookie)
+static const char *shmem_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct page *page = cookie;
- kunmap(page);
- mark_page_accessed(page);
- page_cache_release(page);
+ struct page *page = NULL;
+ int error;
+ if (!dentry) {
+ page = find_get_page(inode->i_mapping, 0);
+ if (!page)
+ return ERR_PTR(-ECHILD);
+ if (!PageUptodate(page)) {
+ put_page(page);
+ return ERR_PTR(-ECHILD);
+ }
+ } else {
+ error = shmem_getpage(inode, 0, &page, SGP_READ, NULL);
+ if (error)
+ return ERR_PTR(error);
+ unlock_page(page);
+ }
+ set_delayed_call(done, shmem_put_link, page);
+ return page_address(page);
}
#ifdef CONFIG_TMPFS_XATTR
@@ -2561,122 +2564,74 @@ static int shmem_initxattrs(struct inode *inode,
return 0;
}
-static const struct xattr_handler *shmem_xattr_handlers[] = {
-#ifdef CONFIG_TMPFS_POSIX_ACL
- &posix_acl_access_xattr_handler,
- &posix_acl_default_xattr_handler,
-#endif
- NULL
-};
-
-static int shmem_xattr_validate(const char *name)
-{
- struct { const char *prefix; size_t len; } arr[] = {
- { XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN },
- { XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN }
- };
- int i;
-
- for (i = 0; i < ARRAY_SIZE(arr); i++) {
- size_t preflen = arr[i].len;
- if (strncmp(name, arr[i].prefix, preflen) == 0) {
- if (!name[preflen])
- return -EINVAL;
- return 0;
- }
- }
- return -EOPNOTSUPP;
-}
-
-static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
- void *buffer, size_t size)
+static int shmem_xattr_handler_get(const struct xattr_handler *handler,
+ struct dentry *dentry, const char *name,
+ void *buffer, size_t size)
{
struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
- int err;
-
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_getxattr(dentry, name, buffer, size);
-
- err = shmem_xattr_validate(name);
- if (err)
- return err;
+ name = xattr_full_name(handler, name);
return simple_xattr_get(&info->xattrs, name, buffer, size);
}
-static int shmem_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags)
+static int shmem_xattr_handler_set(const struct xattr_handler *handler,
+ struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
{
struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
- int err;
-
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_setxattr(dentry, name, value, size, flags);
-
- err = shmem_xattr_validate(name);
- if (err)
- return err;
+ name = xattr_full_name(handler, name);
return simple_xattr_set(&info->xattrs, name, value, size, flags);
}
-static int shmem_removexattr(struct dentry *dentry, const char *name)
-{
- struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
- int err;
-
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_removexattr(dentry, name);
+static const struct xattr_handler shmem_security_xattr_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .get = shmem_xattr_handler_get,
+ .set = shmem_xattr_handler_set,
+};
- err = shmem_xattr_validate(name);
- if (err)
- return err;
+static const struct xattr_handler shmem_trusted_xattr_handler = {
+ .prefix = XATTR_TRUSTED_PREFIX,
+ .get = shmem_xattr_handler_get,
+ .set = shmem_xattr_handler_set,
+};
- return simple_xattr_remove(&info->xattrs, name);
-}
+static const struct xattr_handler *shmem_xattr_handlers[] = {
+#ifdef CONFIG_TMPFS_POSIX_ACL
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
+#endif
+ &shmem_security_xattr_handler,
+ &shmem_trusted_xattr_handler,
+ NULL
+};
static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
- return simple_xattr_list(&info->xattrs, buffer, size);
+ return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size);
}
#endif /* CONFIG_TMPFS_XATTR */
static const struct inode_operations shmem_short_symlink_operations = {
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
#ifdef CONFIG_TMPFS_XATTR
- .setxattr = shmem_setxattr,
- .getxattr = shmem_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = shmem_listxattr,
- .removexattr = shmem_removexattr,
+ .removexattr = generic_removexattr,
#endif
};
static const struct inode_operations shmem_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = shmem_follow_link,
- .put_link = shmem_put_link,
+ .get_link = shmem_get_link,
#ifdef CONFIG_TMPFS_XATTR
- .setxattr = shmem_setxattr,
- .getxattr = shmem_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = shmem_listxattr,
- .removexattr = shmem_removexattr,
+ .removexattr = generic_removexattr,
#endif
};
@@ -3148,10 +3103,10 @@ static const struct inode_operations shmem_inode_operations = {
.getattr = shmem_getattr,
.setattr = shmem_setattr,
#ifdef CONFIG_TMPFS_XATTR
- .setxattr = shmem_setxattr,
- .getxattr = shmem_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = shmem_listxattr,
- .removexattr = shmem_removexattr,
+ .removexattr = generic_removexattr,
.set_acl = simple_set_acl,
#endif
};
@@ -3170,10 +3125,10 @@ static const struct inode_operations shmem_dir_inode_operations = {
.tmpfile = shmem_tmpfile,
#endif
#ifdef CONFIG_TMPFS_XATTR
- .setxattr = shmem_setxattr,
- .getxattr = shmem_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = shmem_listxattr,
- .removexattr = shmem_removexattr,
+ .removexattr = generic_removexattr,
#endif
#ifdef CONFIG_TMPFS_POSIX_ACL
.setattr = shmem_setattr,
@@ -3183,10 +3138,10 @@ static const struct inode_operations shmem_dir_inode_operations = {
static const struct inode_operations shmem_special_inode_operations = {
#ifdef CONFIG_TMPFS_XATTR
- .setxattr = shmem_setxattr,
- .getxattr = shmem_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = shmem_listxattr,
- .removexattr = shmem_removexattr,
+ .removexattr = generic_removexattr,
#endif
#ifdef CONFIG_TMPFS_POSIX_ACL
.setattr = shmem_setattr,
diff --git a/mm/slab.c b/mm/slab.c
index e0819fa96559..4765c97ce690 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3419,7 +3419,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
-bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
void **p)
{
return __kmem_cache_alloc_bulk(s, flags, size, p);
diff --git a/mm/slab.h b/mm/slab.h
index 27492eb678f7..7b6087197997 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -170,7 +170,7 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer,
* may be allocated or freed using these operations.
*/
void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
-bool __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
+int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
#ifdef CONFIG_MEMCG_KMEM
/*
diff --git a/mm/slab_common.c b/mm/slab_common.c
index d88e97c10a2e..3c6a86b4ec25 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -112,7 +112,7 @@ void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
kmem_cache_free(s, p[i]);
}
-bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
+int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
void **p)
{
size_t i;
@@ -121,10 +121,10 @@ bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
void *x = p[i] = kmem_cache_alloc(s, flags);
if (!x) {
__kmem_cache_free_bulk(s, i, p);
- return false;
+ return 0;
}
}
- return true;
+ return i;
}
#ifdef CONFIG_MEMCG_KMEM
diff --git a/mm/slob.c b/mm/slob.c
index 0d7e5df74d1f..17e8f8cc7c53 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -617,7 +617,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
-bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
void **p)
{
return __kmem_cache_alloc_bulk(s, flags, size, p);
diff --git a/mm/slub.c b/mm/slub.c
index 7cb4bf9ae320..46997517406e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1065,11 +1065,15 @@ bad:
return 0;
}
+/* Supports checking bulk free of a constructed freelist */
static noinline struct kmem_cache_node *free_debug_processing(
- struct kmem_cache *s, struct page *page, void *object,
+ struct kmem_cache *s, struct page *page,
+ void *head, void *tail, int bulk_cnt,
unsigned long addr, unsigned long *flags)
{
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+ void *object = head;
+ int cnt = 0;
spin_lock_irqsave(&n->list_lock, *flags);
slab_lock(page);
@@ -1077,6 +1081,9 @@ static noinline struct kmem_cache_node *free_debug_processing(
if (!check_slab(s, page))
goto fail;
+next_object:
+ cnt++;
+
if (!check_valid_pointer(s, page, object)) {
slab_err(s, page, "Invalid object pointer 0x%p", object);
goto fail;
@@ -1107,8 +1114,19 @@ static noinline struct kmem_cache_node *free_debug_processing(
if (s->flags & SLAB_STORE_USER)
set_track(s, object, TRACK_FREE, addr);
trace(s, page, object, 0);
+ /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
init_object(s, object, SLUB_RED_INACTIVE);
+
+ /* Reached end of constructed freelist yet? */
+ if (object != tail) {
+ object = get_freepointer(s, object);
+ goto next_object;
+ }
out:
+ if (cnt != bulk_cnt)
+ slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
+ bulk_cnt, cnt);
+
slab_unlock(page);
/*
* Keep node_lock to preserve integrity
@@ -1204,7 +1222,7 @@ unsigned long kmem_cache_flags(unsigned long object_size,
return flags;
}
-#else
+#else /* !CONFIG_SLUB_DEBUG */
static inline void setup_object_debug(struct kmem_cache *s,
struct page *page, void *object) {}
@@ -1212,7 +1230,8 @@ static inline int alloc_debug_processing(struct kmem_cache *s,
struct page *page, void *object, unsigned long addr) { return 0; }
static inline struct kmem_cache_node *free_debug_processing(
- struct kmem_cache *s, struct page *page, void *object,
+ struct kmem_cache *s, struct page *page,
+ void *head, void *tail, int bulk_cnt,
unsigned long addr, unsigned long *flags) { return NULL; }
static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
@@ -1273,14 +1292,21 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
return memcg_kmem_get_cache(s, flags);
}
-static inline void slab_post_alloc_hook(struct kmem_cache *s,
- gfp_t flags, void *object)
+static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
+ size_t size, void **p)
{
+ size_t i;
+
flags &= gfp_allowed_mask;
- kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
- kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
+ for (i = 0; i < size; i++) {
+ void *object = p[i];
+
+ kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
+ kmemleak_alloc_recursive(object, s->object_size, 1,
+ s->flags, flags);
+ kasan_slab_alloc(s, object);
+ }
memcg_kmem_put_cache(s);
- kasan_slab_alloc(s, object);
}
static inline void slab_free_hook(struct kmem_cache *s, void *x)
@@ -1308,6 +1334,29 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x)
kasan_slab_free(s, x);
}
+static inline void slab_free_freelist_hook(struct kmem_cache *s,
+ void *head, void *tail)
+{
+/*
+ * Compiler cannot detect this function can be removed if slab_free_hook()
+ * evaluates to nothing. Thus, catch all relevant config debug options here.
+ */
+#if defined(CONFIG_KMEMCHECK) || \
+ defined(CONFIG_LOCKDEP) || \
+ defined(CONFIG_DEBUG_KMEMLEAK) || \
+ defined(CONFIG_DEBUG_OBJECTS_FREE) || \
+ defined(CONFIG_KASAN)
+
+ void *object = head;
+ void *tail_obj = tail ? : head;
+
+ do {
+ slab_free_hook(s, object);
+ } while ((object != tail_obj) &&
+ (object = get_freepointer(s, object)));
+#endif
+}
+
static void setup_object(struct kmem_cache *s, struct page *page,
void *object)
{
@@ -2295,23 +2344,15 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
* And if we were unable to get a new slab from the partial slab lists then
* we need to allocate a new slab. This is the slowest path since it involves
* a call to the page allocator and the setup of a new slab.
+ *
+ * Version of __slab_alloc to use when we know that interrupts are
+ * already disabled (which is the case for bulk allocation).
*/
-static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned long addr, struct kmem_cache_cpu *c)
{
void *freelist;
struct page *page;
- unsigned long flags;
-
- local_irq_save(flags);
-#ifdef CONFIG_PREEMPT
- /*
- * We may have been preempted and rescheduled on a different
- * cpu before disabling interrupts. Need to reload cpu area
- * pointer.
- */
- c = this_cpu_ptr(s->cpu_slab);
-#endif
page = c->page;
if (!page)
@@ -2369,7 +2410,6 @@ load_freelist:
VM_BUG_ON(!c->page->frozen);
c->freelist = get_freepointer(s, freelist);
c->tid = next_tid(c->tid);
- local_irq_restore(flags);
return freelist;
new_slab:
@@ -2386,7 +2426,6 @@ new_slab:
if (unlikely(!freelist)) {
slab_out_of_memory(s, gfpflags, node);
- local_irq_restore(flags);
return NULL;
}
@@ -2402,11 +2441,35 @@ new_slab:
deactivate_slab(s, page, get_freepointer(s, freelist));
c->page = NULL;
c->freelist = NULL;
- local_irq_restore(flags);
return freelist;
}
/*
+ * Another one that disabled interrupt and compensates for possible
+ * cpu changes by refetching the per cpu area pointer.
+ */
+static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ unsigned long addr, struct kmem_cache_cpu *c)
+{
+ void *p;
+ unsigned long flags;
+
+ local_irq_save(flags);
+#ifdef CONFIG_PREEMPT
+ /*
+ * We may have been preempted and rescheduled on a different
+ * cpu before disabling interrupts. Need to reload cpu area
+ * pointer.
+ */
+ c = this_cpu_ptr(s->cpu_slab);
+#endif
+
+ p = ___slab_alloc(s, gfpflags, node, addr, c);
+ local_irq_restore(flags);
+ return p;
+}
+
+/*
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
* have the fastpath folded into their functions. So no function call
* overhead for requests that can be satisfied on the fastpath.
@@ -2419,7 +2482,7 @@ new_slab:
static __always_inline void *slab_alloc_node(struct kmem_cache *s,
gfp_t gfpflags, int node, unsigned long addr)
{
- void **object;
+ void *object;
struct kmem_cache_cpu *c;
struct page *page;
unsigned long tid;
@@ -2498,7 +2561,7 @@ redo:
if (unlikely(gfpflags & __GFP_ZERO) && object)
memset(object, 0, s->object_size);
- slab_post_alloc_hook(s, gfpflags, object);
+ slab_post_alloc_hook(s, gfpflags, 1, &object);
return object;
}
@@ -2569,10 +2632,11 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
* handling required then we can return immediately.
*/
static void __slab_free(struct kmem_cache *s, struct page *page,
- void *x, unsigned long addr)
+ void *head, void *tail, int cnt,
+ unsigned long addr)
+
{
void *prior;
- void **object = (void *)x;
int was_frozen;
struct page new;
unsigned long counters;
@@ -2582,7 +2646,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
stat(s, FREE_SLOWPATH);
if (kmem_cache_debug(s) &&
- !(n = free_debug_processing(s, page, x, addr, &flags)))
+ !(n = free_debug_processing(s, page, head, tail, cnt,
+ addr, &flags)))
return;
do {
@@ -2592,10 +2657,10 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
}
prior = page->freelist;
counters = page->counters;
- set_freepointer(s, object, prior);
+ set_freepointer(s, tail, prior);
new.counters = counters;
was_frozen = new.frozen;
- new.inuse--;
+ new.inuse -= cnt;
if ((!new.inuse || !prior) && !was_frozen) {
if (kmem_cache_has_cpu_partial(s) && !prior) {
@@ -2626,7 +2691,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
} while (!cmpxchg_double_slab(s, page,
prior, counters,
- object, new.counters,
+ head, new.counters,
"__slab_free"));
if (likely(!n)) {
@@ -2691,15 +2756,20 @@ slab_empty:
*
* If fastpath is not possible then fall back to __slab_free where we deal
* with all sorts of special processing.
+ *
+ * Bulk free of a freelist with several objects (all pointing to the
+ * same page) possible by specifying head and tail ptr, plus objects
+ * count (cnt). Bulk free indicated by tail pointer being set.
*/
-static __always_inline void slab_free(struct kmem_cache *s,
- struct page *page, void *x, unsigned long addr)
+static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
+ void *head, void *tail, int cnt,
+ unsigned long addr)
{
- void **object = (void *)x;
+ void *tail_obj = tail ? : head;
struct kmem_cache_cpu *c;
unsigned long tid;
- slab_free_hook(s, x);
+ slab_free_freelist_hook(s, head, tail);
redo:
/*
@@ -2718,19 +2788,19 @@ redo:
barrier();
if (likely(page == c->page)) {
- set_freepointer(s, object, c->freelist);
+ set_freepointer(s, tail_obj, c->freelist);
if (unlikely(!this_cpu_cmpxchg_double(
s->cpu_slab->freelist, s->cpu_slab->tid,
c->freelist, tid,
- object, next_tid(tid)))) {
+ head, next_tid(tid)))) {
note_cmpxchg_failure("slab_free", s, tid);
goto redo;
}
stat(s, FREE_FASTPATH);
} else
- __slab_free(s, page, x, addr);
+ __slab_free(s, page, head, tail_obj, cnt, addr);
}
@@ -2739,59 +2809,116 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
s = cache_from_obj(s, x);
if (!s)
return;
- slab_free(s, virt_to_head_page(x), x, _RET_IP_);
+ slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
trace_kmem_cache_free(_RET_IP_, x);
}
EXPORT_SYMBOL(kmem_cache_free);
-/* Note that interrupts must be enabled when calling this function. */
-void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
-{
- struct kmem_cache_cpu *c;
+struct detached_freelist {
struct page *page;
- int i;
+ void *tail;
+ void *freelist;
+ int cnt;
+};
- local_irq_disable();
- c = this_cpu_ptr(s->cpu_slab);
+/*
+ * This function progressively scans the array with free objects (with
+ * a limited look ahead) and extract objects belonging to the same
+ * page. It builds a detached freelist directly within the given
+ * page/objects. This can happen without any need for
+ * synchronization, because the objects are owned by running process.
+ * The freelist is build up as a single linked list in the objects.
+ * The idea is, that this detached freelist can then be bulk
+ * transferred to the real freelist(s), but only requiring a single
+ * synchronization primitive. Look ahead in the array is limited due
+ * to performance reasons.
+ */
+static int build_detached_freelist(struct kmem_cache *s, size_t size,
+ void **p, struct detached_freelist *df)
+{
+ size_t first_skipped_index = 0;
+ int lookahead = 3;
+ void *object;
- for (i = 0; i < size; i++) {
- void *object = p[i];
+ /* Always re-init detached_freelist */
+ df->page = NULL;
- BUG_ON(!object);
- /* kmem cache debug support */
- s = cache_from_obj(s, object);
- if (unlikely(!s))
- goto exit;
- slab_free_hook(s, object);
+ do {
+ object = p[--size];
+ } while (!object && size);
- page = virt_to_head_page(object);
+ if (!object)
+ return 0;
- if (c->page == page) {
- /* Fastpath: local CPU free */
- set_freepointer(s, object, c->freelist);
- c->freelist = object;
- } else {
- c->tid = next_tid(c->tid);
- local_irq_enable();
- /* Slowpath: overhead locked cmpxchg_double_slab */
- __slab_free(s, page, object, _RET_IP_);
- local_irq_disable();
- c = this_cpu_ptr(s->cpu_slab);
+ /* Start new detached freelist */
+ set_freepointer(s, object, NULL);
+ df->page = virt_to_head_page(object);
+ df->tail = object;
+ df->freelist = object;
+ p[size] = NULL; /* mark object processed */
+ df->cnt = 1;
+
+ while (size) {
+ object = p[--size];
+ if (!object)
+ continue; /* Skip processed objects */
+
+ /* df->page is always set at this point */
+ if (df->page == virt_to_head_page(object)) {
+ /* Opportunity build freelist */
+ set_freepointer(s, object, df->freelist);
+ df->freelist = object;
+ df->cnt++;
+ p[size] = NULL; /* mark object processed */
+
+ continue;
}
+
+ /* Limit look ahead search */
+ if (!--lookahead)
+ break;
+
+ if (!first_skipped_index)
+ first_skipped_index = size + 1;
}
-exit:
- c->tid = next_tid(c->tid);
- local_irq_enable();
+
+ return first_skipped_index;
+}
+
+
+/* Note that interrupts must be enabled when calling this function. */
+void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
+{
+ if (WARN_ON(!size))
+ return;
+
+ do {
+ struct detached_freelist df;
+ struct kmem_cache *s;
+
+ /* Support for memcg */
+ s = cache_from_obj(orig_s, p[size - 1]);
+
+ size = build_detached_freelist(s, size, p, &df);
+ if (unlikely(!df.page))
+ continue;
+
+ slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
+ } while (likely(size));
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
/* Note that interrupts must be enabled when calling this function. */
-bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
- void **p)
+int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+ void **p)
{
struct kmem_cache_cpu *c;
int i;
+ /* memcg and kmem_cache debug support */
+ s = slab_pre_alloc_hook(s, flags);
+ if (unlikely(!s))
+ return false;
/*
* Drain objects in the per cpu slab, while disabling local
* IRQs, which protects against PREEMPT and interrupts
@@ -2804,36 +2931,20 @@ bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
void *object = c->freelist;
if (unlikely(!object)) {
- local_irq_enable();
/*
* Invoking slow path likely have side-effect
* of re-populating per CPU c->freelist
*/
- p[i] = __slab_alloc(s, flags, NUMA_NO_NODE,
+ p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
_RET_IP_, c);
- if (unlikely(!p[i])) {
- __kmem_cache_free_bulk(s, i, p);
- return false;
- }
- local_irq_disable();
+ if (unlikely(!p[i]))
+ goto error;
+
c = this_cpu_ptr(s->cpu_slab);
continue; /* goto for-loop */
}
-
- /* kmem_cache debug support */
- s = slab_pre_alloc_hook(s, flags);
- if (unlikely(!s)) {
- __kmem_cache_free_bulk(s, i, p);
- c->tid = next_tid(c->tid);
- local_irq_enable();
- return false;
- }
-
c->freelist = get_freepointer(s, object);
p[i] = object;
-
- /* kmem_cache debug support */
- slab_post_alloc_hook(s, flags, object);
}
c->tid = next_tid(c->tid);
local_irq_enable();
@@ -2846,7 +2957,14 @@ bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
memset(p[j], 0, s->object_size);
}
- return true;
+ /* memcg and kmem_cache debug support */
+ slab_post_alloc_hook(s, flags, size, p);
+ return i;
+error:
+ local_irq_enable();
+ slab_post_alloc_hook(s, flags, i, p);
+ __kmem_cache_free_bulk(s, i, p);
+ return 0;
}
EXPORT_SYMBOL(kmem_cache_alloc_bulk);
@@ -3511,7 +3629,7 @@ void kfree(const void *x)
__free_kmem_pages(page, compound_order(page));
return;
}
- slab_free(page->slab_cache, page, object, _RET_IP_);
+ slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
}
EXPORT_SYMBOL(kfree);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d04563480c94..8e3c9c5a3042 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1443,7 +1443,6 @@ struct vm_struct *remove_vm_area(const void *addr)
vmap_debug_free_range(va->va_start, va->va_end);
kasan_free_shadow(vm);
free_unmap_vmap_area(va);
- vm->size -= PAGE_SIZE;
return vm;
}
@@ -1468,8 +1467,8 @@ static void __vunmap(const void *addr, int deallocate_pages)
return;
}
- debug_check_no_locks_freed(addr, area->size);
- debug_check_no_obj_freed(addr, area->size);
+ debug_check_no_locks_freed(addr, get_vm_area_size(area));
+ debug_check_no_obj_freed(addr, get_vm_area_size(area));
if (deallocate_pages) {
int i;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 879a2be23325..c54fd2924f25 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -219,7 +219,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat,
* particular counter cannot be updated from interrupt context.
*/
void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
- int delta)
+ long delta)
{
struct per_cpu_pageset __percpu *pcp = zone->pageset;
s8 __percpu *p = pcp->vm_stat_diff + item;
@@ -318,8 +318,8 @@ EXPORT_SYMBOL(__dec_zone_page_state);
* 1 Overstepping half of threshold
* -1 Overstepping minus half of threshold
*/
-static inline void mod_state(struct zone *zone,
- enum zone_stat_item item, int delta, int overstep_mode)
+static inline void mod_state(struct zone *zone, enum zone_stat_item item,
+ long delta, int overstep_mode)
{
struct per_cpu_pageset __percpu *pcp = zone->pageset;
s8 __percpu *p = pcp->vm_stat_diff + item;
@@ -357,7 +357,7 @@ static inline void mod_state(struct zone *zone,
}
void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
- int delta)
+ long delta)
{
mod_state(zone, item, delta, 0);
}
@@ -384,7 +384,7 @@ EXPORT_SYMBOL(dec_zone_page_state);
* Use interrupt disable to serialize counter updates
*/
void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
- int delta)
+ long delta)
{
unsigned long flags;
@@ -921,8 +921,8 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
#ifdef CONFIG_PROC_FS
static char * const migratetype_names[MIGRATE_TYPES] = {
"Unmovable",
- "Reclaimable",
"Movable",
+ "Reclaimable",
"HighAtomic",
#ifdef CONFIG_CMA
"CMA",
@@ -1379,6 +1379,7 @@ static const struct file_operations proc_vmstat_file_operations = {
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_SMP
+static struct workqueue_struct *vmstat_wq;
static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
int sysctl_stat_interval __read_mostly = HZ;
static cpumask_var_t cpu_stat_off;
@@ -1391,7 +1392,7 @@ static void vmstat_update(struct work_struct *w)
* to occur in the future. Keep on running the
* update worker thread.
*/
- schedule_delayed_work_on(smp_processor_id(),
+ queue_delayed_work_on(smp_processor_id(), vmstat_wq,
this_cpu_ptr(&vmstat_work),
round_jiffies_relative(sysctl_stat_interval));
} else {
@@ -1460,7 +1461,7 @@ static void vmstat_shepherd(struct work_struct *w)
if (need_update(cpu) &&
cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
- schedule_delayed_work_on(cpu,
+ queue_delayed_work_on(cpu, vmstat_wq,
&per_cpu(vmstat_work, cpu), 0);
put_online_cpus();
@@ -1482,6 +1483,7 @@ static void __init start_shepherd_timer(void)
BUG();
cpumask_copy(cpu_stat_off, cpu_online_mask);
+ vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
schedule_delayed_work(&shepherd,
round_jiffies_relative(sysctl_stat_interval));
}
diff --git a/mm/zswap.c b/mm/zswap.c
index 025f8dc723de..bf14508afd64 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -541,6 +541,7 @@ static struct zswap_pool *zswap_pool_last_get(void)
return last;
}
+/* type and compressor must be null-terminated */
static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
{
struct zswap_pool *pool;
@@ -548,10 +549,9 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
assert_spin_locked(&zswap_pools_lock);
list_for_each_entry_rcu(pool, &zswap_pools, list) {
- if (strncmp(pool->tfm_name, compressor, sizeof(pool->tfm_name)))
+ if (strcmp(pool->tfm_name, compressor))
continue;
- if (strncmp(zpool_get_type(pool->zpool), type,
- sizeof(zswap_zpool_type)))
+ if (strcmp(zpool_get_type(pool->zpool), type))
continue;
/* if we can't get it, it's about to be destroyed */
if (!zswap_pool_get(pool))
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 496b27588493..e2ed69850489 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -30,7 +30,9 @@ bool vlan_do_receive(struct sk_buff **skbp)
skb->pkt_type = PACKET_HOST;
}
- if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+ if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR) &&
+ !netif_is_macvlan_port(vlan_dev) &&
+ !netif_is_bridge_port(vlan_dev)) {
unsigned int offset = skb->data - skb_mac_header(skb);
/*
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index ae3a47f9d1d5..fbd0acf80b13 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -805,6 +805,9 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
struct sock *sk;
ax25_cb *ax25;
+ if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+ return -EINVAL;
+
if (!net_eq(net, &init_net))
return -EAFNOSUPPORT;
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 83bc1aaf5800..a49c705fb86b 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -566,6 +566,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
int select;
batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key;
struct batadv_dat_candidate *res;
+ struct batadv_dat_entry dat;
if (!bat_priv->orig_hash)
return NULL;
@@ -575,7 +576,9 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
if (!res)
return NULL;
- ip_key = (batadv_dat_addr_t)batadv_hash_dat(&ip_dst,
+ dat.ip = ip_dst;
+ dat.vid = 0;
+ ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat,
BATADV_DAT_ADDR_MAX);
batadv_dbg(BATADV_DBG_DAT, bat_priv,
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 8d990b070a2e..3207667e69de 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -836,6 +836,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
u8 *orig_addr;
struct batadv_orig_node *orig_node = NULL;
int check, hdr_size = sizeof(*unicast_packet);
+ enum batadv_subtype subtype;
bool is4addr;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -863,10 +864,20 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
/* packet for me */
if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
if (is4addr) {
- batadv_dat_inc_counter(bat_priv,
- unicast_4addr_packet->subtype);
- orig_addr = unicast_4addr_packet->src;
- orig_node = batadv_orig_hash_find(bat_priv, orig_addr);
+ subtype = unicast_4addr_packet->subtype;
+ batadv_dat_inc_counter(bat_priv, subtype);
+
+ /* Only payload data should be considered for speedy
+ * join. For example, DAT also uses unicast 4addr
+ * types, but those packets should not be considered
+ * for speedy join, since the clients do not actually
+ * reside at the sending originator.
+ */
+ if (subtype == BATADV_P_DATA) {
+ orig_addr = unicast_4addr_packet->src;
+ orig_node = batadv_orig_hash_find(bat_priv,
+ orig_addr);
+ }
}
if (batadv_dat_snoop_incoming_arp_request(bat_priv, skb,
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 4228b10c47ea..76f19ba62462 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -68,13 +68,15 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
unsigned short vid, const char *message,
bool roaming);
-/* returns 1 if they are the same mac addr */
+/* returns 1 if they are the same mac addr and vid */
static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
{
const void *data1 = container_of(node, struct batadv_tt_common_entry,
hash_entry);
+ const struct batadv_tt_common_entry *tt1 = data1;
+ const struct batadv_tt_common_entry *tt2 = data2;
- return batadv_compare_eth(data1, data2);
+ return (tt1->vid == tt2->vid) && batadv_compare_eth(data1, data2);
}
/**
@@ -1427,9 +1429,15 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
}
/* if the client was temporary added before receiving the first
- * OGM announcing it, we have to clear the TEMP flag
+ * OGM announcing it, we have to clear the TEMP flag. Also,
+ * remove the previous temporary orig node and re-add it
+ * if required. If the orig entry changed, the new one which
+ * is a non-temporary entry is preferred.
*/
- common->flags &= ~BATADV_TT_CLIENT_TEMP;
+ if (common->flags & BATADV_TT_CLIENT_TEMP) {
+ batadv_tt_global_del_orig_list(tt_global_entry);
+ common->flags &= ~BATADV_TT_CLIENT_TEMP;
+ }
/* the change can carry possible "attribute" flags like the
* TT_CLIENT_WIFI, therefore they have to be copied in the
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index a3bffd1ec2b4..70306cc9d814 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -271,11 +271,11 @@ static long bt_sock_data_wait(struct sock *sk, long timeo)
if (signal_pending(current) || !timeo)
break;
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
release_sock(sk);
timeo = schedule_timeout(timeo);
lock_sock(sk);
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
}
__set_current_state(TASK_RUNNING);
@@ -441,7 +441,7 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock,
if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
return mask;
}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index fe129663bd3f..f52bcbf2e58c 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -526,6 +526,9 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr,
if (!addr || addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
+ if (addr_len < sizeof(struct sockaddr_sco))
+ return -EINVAL;
+
lock_sock(sk);
if (sk->sk_state != BT_OPEN) {
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index c91353841e40..ffed8a1d4f27 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -3027,8 +3027,13 @@ static void smp_ready_cb(struct l2cap_chan *chan)
BT_DBG("chan %p", chan);
+ /* No need to call l2cap_chan_hold() here since we already own
+ * the reference taken in smp_new_conn_cb(). This is just the
+ * first time that we tie it to a specific pointer. The code in
+ * l2cap_core.c ensures that there's no risk this function wont
+ * get called if smp_new_conn_cb was previously called.
+ */
conn->smp = chan;
- l2cap_chan_hold(chan);
if (hcon->type == ACL_LINK && test_bit(HCI_CONN_ENCRYPT, &hcon->flags))
bredr_pairing(chan);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index f7e8dee64fc8..5f3f64553179 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -48,7 +48,7 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
p->state = state;
err = switchdev_port_attr_set(p->dev, &attr);
- if (err)
+ if (err && err != -EOPNOTSUPP)
br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
(unsigned int) p->port_no, p->dev->name);
}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index fa53d7a89f48..8a7ada8bb947 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -39,7 +39,7 @@ void br_init_port(struct net_bridge_port *p)
struct switchdev_attr attr = {
.id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
.flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER,
- .u.ageing_time = p->br->ageing_time,
+ .u.ageing_time = jiffies_to_clock_t(p->br->ageing_time),
};
int err;
@@ -50,7 +50,7 @@ void br_init_port(struct net_bridge_port *p)
p->config_pending = 0;
err = switchdev_port_attr_set(p->dev, &attr);
- if (err)
+ if (err && err != -EOPNOTSUPP)
netdev_err(p->dev, "failed to set HW ageing time\n");
}
@@ -142,7 +142,10 @@ static void br_stp_start(struct net_bridge *br)
char *envp[] = { NULL };
struct net_bridge_port *p;
- r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+ if (net_eq(dev_net(br->dev), &init_net))
+ r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+ else
+ r = -ENOENT;
spin_lock_bh(&br->lock);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index cc858919108e..aa209b1066c9 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -323,7 +323,7 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
!timeo)
break;
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
release_sock(sk);
timeo = schedule_timeout(timeo);
lock_sock(sk);
@@ -331,7 +331,7 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
if (sock_flag(sk, SOCK_DEAD))
break;
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
}
finish_wait(sk_sleep(sk), &wait);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 617088aee21d..d62af69ad844 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -785,7 +785,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
if (sock_writeable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
return mask;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index ab9b8d0d115e..ae00b894e675 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2403,17 +2403,20 @@ static void skb_warn_bad_offload(const struct sk_buff *skb)
{
static const netdev_features_t null_features = 0;
struct net_device *dev = skb->dev;
- const char *driver = "";
+ const char *name = "";
if (!net_ratelimit())
return;
- if (dev && dev->dev.parent)
- driver = dev_driver_string(dev->dev.parent);
-
+ if (dev) {
+ if (dev->dev.parent)
+ name = dev_driver_string(dev->dev.parent);
+ else
+ name = netdev_name(dev);
+ }
WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
"gso_type=%d ip_summed=%d\n",
- driver, dev ? &dev->features : &null_features,
+ name, dev ? &dev->features : &null_features,
skb->sk ? &skb->sk->sk_route_caps : &null_features,
skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
skb_shinfo(skb)->gso_type, skb->ip_summed);
@@ -6426,11 +6429,16 @@ int __netdev_update_features(struct net_device *dev)
if (dev->netdev_ops->ndo_set_features)
err = dev->netdev_ops->ndo_set_features(dev, features);
+ else
+ err = 0;
if (unlikely(err < 0)) {
netdev_err(dev,
"set_features() failed (%d); wanted %pNF, left %pNF\n",
err, &features, &dev->features);
+ /* return non-0 since some features might have changed and
+ * it's better to fire a spurious notification than miss it
+ */
return -1;
}
diff --git a/net/core/dst.c b/net/core/dst.c
index e6dc77252fe9..a1656e3b8d72 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -301,12 +301,13 @@ void dst_release(struct dst_entry *dst)
{
if (dst) {
int newrefcnt;
+ unsigned short nocache = dst->flags & DST_NOCACHE;
newrefcnt = atomic_dec_return(&dst->__refcnt);
if (unlikely(newrefcnt < 0))
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt);
- if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE))
+ if (!newrefcnt && unlikely(nocache))
call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1aa8437ed6c4..f18ae91b652e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -857,7 +857,7 @@ static void neigh_probe(struct neighbour *neigh)
struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
/* keep skb alive even if arp_queue overflows */
if (skb)
- skb = skb_copy(skb, GFP_ATOMIC);
+ skb = skb_clone(skb, GFP_ATOMIC);
write_unlock(&neigh->lock);
neigh->ops->solicit(neigh, skb);
atomic_inc(&neigh->probes);
@@ -2215,7 +2215,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
ndm->ndm_pad2 = 0;
ndm->ndm_flags = pn->flags | NTF_PROXY;
ndm->ndm_type = RTN_UNICAST;
- ndm->ndm_ifindex = pn->dev->ifindex;
+ ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
ndm->ndm_state = NUD_NONE;
if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
@@ -2333,7 +2333,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (h > s_h)
s_idx = 0;
for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
- if (dev_net(n->dev) != net)
+ if (pneigh_net(n) != net)
continue;
if (idx < s_idx)
goto next;
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 6441f47b1a8f..d9ee8d08a3a6 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -56,7 +56,7 @@ static void cgrp_css_free(struct cgroup_subsys_state *css)
kfree(css_cls_state(css));
}
-static int update_classid(const void *v, struct file *file, unsigned n)
+static int update_classid_sock(const void *v, struct file *file, unsigned n)
{
int err;
struct socket *sock = sock_from_file(file, &err);
@@ -67,18 +67,27 @@ static int update_classid(const void *v, struct file *file, unsigned n)
return 0;
}
-static void cgrp_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void update_classid(struct cgroup_subsys_state *css, void *v)
{
- struct cgroup_cls_state *cs = css_cls_state(css);
- void *v = (void *)(unsigned long)cs->classid;
+ struct css_task_iter it;
struct task_struct *p;
- cgroup_taskset_for_each(p, tset) {
+ css_task_iter_start(css, &it);
+ while ((p = css_task_iter_next(&it))) {
task_lock(p);
- iterate_fd(p->files, 0, update_classid, v);
+ iterate_fd(p->files, 0, update_classid_sock, v);
task_unlock(p);
}
+ css_task_iter_end(&it);
+}
+
+static void cgrp_attach(struct cgroup_taskset *tset)
+{
+ struct cgroup_subsys_state *css;
+
+ cgroup_taskset_first(tset, &css);
+ update_classid(css,
+ (void *)(unsigned long)css_cls_state(css)->classid);
}
static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
@@ -89,8 +98,11 @@ static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
u64 value)
{
- css_cls_state(css)->classid = (u32) value;
+ struct cgroup_cls_state *cs = css_cls_state(css);
+
+ cs->classid = (u32)value;
+ update_classid(css, (void *)(unsigned long)cs->classid);
return 0;
}
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index cbd0a199bf52..40fd09fe06ae 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -218,13 +218,14 @@ static int update_netprio(const void *v, struct file *file, unsigned n)
return 0;
}
-static void net_prio_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static void net_prio_attach(struct cgroup_taskset *tset)
{
struct task_struct *p;
- void *v = (void *)(unsigned long)css->cgroup->id;
+ struct cgroup_subsys_state *css;
+
+ cgroup_taskset_for_each(p, css, tset) {
+ void *v = (void *)(unsigned long)css->cgroup->id;
- cgroup_taskset_for_each(p, tset) {
task_lock(p);
iterate_fd(p->files, 0, update_netprio, v);
task_unlock(p);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 504bd17b7456..34ba7a08876d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1045,15 +1045,156 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
return 0;
}
+static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ const struct rtnl_link_stats64 *stats;
+ struct rtnl_link_stats64 temp;
+ struct nlattr *attr;
+
+ stats = dev_get_stats(dev, &temp);
+
+ attr = nla_reserve(skb, IFLA_STATS,
+ sizeof(struct rtnl_link_stats));
+ if (!attr)
+ return -EMSGSIZE;
+
+ copy_rtnl_link_stats(nla_data(attr), stats);
+
+ attr = nla_reserve(skb, IFLA_STATS64,
+ sizeof(struct rtnl_link_stats64));
+ if (!attr)
+ return -EMSGSIZE;
+
+ copy_rtnl_link_stats64(nla_data(attr), stats);
+
+ return 0;
+}
+
+static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
+ struct net_device *dev,
+ int vfs_num,
+ struct nlattr *vfinfo)
+{
+ struct ifla_vf_rss_query_en vf_rss_query_en;
+ struct ifla_vf_link_state vf_linkstate;
+ struct ifla_vf_spoofchk vf_spoofchk;
+ struct ifla_vf_tx_rate vf_tx_rate;
+ struct ifla_vf_stats vf_stats;
+ struct ifla_vf_trust vf_trust;
+ struct ifla_vf_vlan vf_vlan;
+ struct ifla_vf_rate vf_rate;
+ struct nlattr *vf, *vfstats;
+ struct ifla_vf_mac vf_mac;
+ struct ifla_vf_info ivi;
+
+ /* Not all SR-IOV capable drivers support the
+ * spoofcheck and "RSS query enable" query. Preset to
+ * -1 so the user space tool can detect that the driver
+ * didn't report anything.
+ */
+ ivi.spoofchk = -1;
+ ivi.rss_query_en = -1;
+ ivi.trusted = -1;
+ memset(ivi.mac, 0, sizeof(ivi.mac));
+ /* The default value for VF link state is "auto"
+ * IFLA_VF_LINK_STATE_AUTO which equals zero
+ */
+ ivi.linkstate = 0;
+ if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi))
+ return 0;
+
+ vf_mac.vf =
+ vf_vlan.vf =
+ vf_rate.vf =
+ vf_tx_rate.vf =
+ vf_spoofchk.vf =
+ vf_linkstate.vf =
+ vf_rss_query_en.vf =
+ vf_trust.vf = ivi.vf;
+
+ memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
+ vf_vlan.vlan = ivi.vlan;
+ vf_vlan.qos = ivi.qos;
+ vf_tx_rate.rate = ivi.max_tx_rate;
+ vf_rate.min_tx_rate = ivi.min_tx_rate;
+ vf_rate.max_tx_rate = ivi.max_tx_rate;
+ vf_spoofchk.setting = ivi.spoofchk;
+ vf_linkstate.link_state = ivi.linkstate;
+ vf_rss_query_en.setting = ivi.rss_query_en;
+ vf_trust.setting = ivi.trusted;
+ vf = nla_nest_start(skb, IFLA_VF_INFO);
+ if (!vf) {
+ nla_nest_cancel(skb, vfinfo);
+ return -EMSGSIZE;
+ }
+ if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
+ nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
+ nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
+ &vf_rate) ||
+ nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
+ &vf_tx_rate) ||
+ nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
+ &vf_spoofchk) ||
+ nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
+ &vf_linkstate) ||
+ nla_put(skb, IFLA_VF_RSS_QUERY_EN,
+ sizeof(vf_rss_query_en),
+ &vf_rss_query_en) ||
+ nla_put(skb, IFLA_VF_TRUST,
+ sizeof(vf_trust), &vf_trust))
+ return -EMSGSIZE;
+ memset(&vf_stats, 0, sizeof(vf_stats));
+ if (dev->netdev_ops->ndo_get_vf_stats)
+ dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
+ &vf_stats);
+ vfstats = nla_nest_start(skb, IFLA_VF_STATS);
+ if (!vfstats) {
+ nla_nest_cancel(skb, vf);
+ nla_nest_cancel(skb, vfinfo);
+ return -EMSGSIZE;
+ }
+ if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
+ vf_stats.rx_packets) ||
+ nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
+ vf_stats.tx_packets) ||
+ nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
+ vf_stats.rx_bytes) ||
+ nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
+ vf_stats.tx_bytes) ||
+ nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
+ vf_stats.broadcast) ||
+ nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
+ vf_stats.multicast))
+ return -EMSGSIZE;
+ nla_nest_end(skb, vfstats);
+ nla_nest_end(skb, vf);
+ return 0;
+}
+
+static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
+{
+ struct rtnl_link_ifmap map = {
+ .mem_start = dev->mem_start,
+ .mem_end = dev->mem_end,
+ .base_addr = dev->base_addr,
+ .irq = dev->irq,
+ .dma = dev->dma,
+ .port = dev->if_port,
+ };
+ if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask)
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
- struct rtnl_link_stats64 temp;
- const struct rtnl_link_stats64 *stats;
- struct nlattr *attr, *af_spec;
+ struct nlattr *af_spec;
struct rtnl_af_ops *af_ops;
struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
@@ -1096,18 +1237,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
goto nla_put_failure;
- if (1) {
- struct rtnl_link_ifmap map = {
- .mem_start = dev->mem_start,
- .mem_end = dev->mem_end,
- .base_addr = dev->base_addr,
- .irq = dev->irq,
- .dma = dev->dma,
- .port = dev->if_port,
- };
- if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
- goto nla_put_failure;
- }
+ if (rtnl_fill_link_ifmap(skb, dev))
+ goto nla_put_failure;
if (dev->addr_len) {
if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) ||
@@ -1124,128 +1255,27 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (rtnl_phys_switch_id_fill(skb, dev))
goto nla_put_failure;
- attr = nla_reserve(skb, IFLA_STATS,
- sizeof(struct rtnl_link_stats));
- if (attr == NULL)
- goto nla_put_failure;
-
- stats = dev_get_stats(dev, &temp);
- copy_rtnl_link_stats(nla_data(attr), stats);
-
- attr = nla_reserve(skb, IFLA_STATS64,
- sizeof(struct rtnl_link_stats64));
- if (attr == NULL)
+ if (rtnl_fill_stats(skb, dev))
goto nla_put_failure;
- copy_rtnl_link_stats64(nla_data(attr), stats);
if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) &&
nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
goto nla_put_failure;
- if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent
- && (ext_filter_mask & RTEXT_FILTER_VF)) {
+ if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
+ ext_filter_mask & RTEXT_FILTER_VF) {
int i;
-
- struct nlattr *vfinfo, *vf, *vfstats;
+ struct nlattr *vfinfo;
int num_vfs = dev_num_vf(dev->dev.parent);
vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
if (!vfinfo)
goto nla_put_failure;
for (i = 0; i < num_vfs; i++) {
- struct ifla_vf_info ivi;
- struct ifla_vf_mac vf_mac;
- struct ifla_vf_vlan vf_vlan;
- struct ifla_vf_rate vf_rate;
- struct ifla_vf_tx_rate vf_tx_rate;
- struct ifla_vf_spoofchk vf_spoofchk;
- struct ifla_vf_link_state vf_linkstate;
- struct ifla_vf_rss_query_en vf_rss_query_en;
- struct ifla_vf_stats vf_stats;
- struct ifla_vf_trust vf_trust;
-
- /*
- * Not all SR-IOV capable drivers support the
- * spoofcheck and "RSS query enable" query. Preset to
- * -1 so the user space tool can detect that the driver
- * didn't report anything.
- */
- ivi.spoofchk = -1;
- ivi.rss_query_en = -1;
- ivi.trusted = -1;
- memset(ivi.mac, 0, sizeof(ivi.mac));
- /* The default value for VF link state is "auto"
- * IFLA_VF_LINK_STATE_AUTO which equals zero
- */
- ivi.linkstate = 0;
- if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
- break;
- vf_mac.vf =
- vf_vlan.vf =
- vf_rate.vf =
- vf_tx_rate.vf =
- vf_spoofchk.vf =
- vf_linkstate.vf =
- vf_rss_query_en.vf =
- vf_trust.vf = ivi.vf;
-
- memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
- vf_vlan.vlan = ivi.vlan;
- vf_vlan.qos = ivi.qos;
- vf_tx_rate.rate = ivi.max_tx_rate;
- vf_rate.min_tx_rate = ivi.min_tx_rate;
- vf_rate.max_tx_rate = ivi.max_tx_rate;
- vf_spoofchk.setting = ivi.spoofchk;
- vf_linkstate.link_state = ivi.linkstate;
- vf_rss_query_en.setting = ivi.rss_query_en;
- vf_trust.setting = ivi.trusted;
- vf = nla_nest_start(skb, IFLA_VF_INFO);
- if (!vf) {
- nla_nest_cancel(skb, vfinfo);
- goto nla_put_failure;
- }
- if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
- nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
- nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
- &vf_rate) ||
- nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
- &vf_tx_rate) ||
- nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
- &vf_spoofchk) ||
- nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
- &vf_linkstate) ||
- nla_put(skb, IFLA_VF_RSS_QUERY_EN,
- sizeof(vf_rss_query_en),
- &vf_rss_query_en) ||
- nla_put(skb, IFLA_VF_TRUST,
- sizeof(vf_trust), &vf_trust))
+ if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
goto nla_put_failure;
- memset(&vf_stats, 0, sizeof(vf_stats));
- if (dev->netdev_ops->ndo_get_vf_stats)
- dev->netdev_ops->ndo_get_vf_stats(dev, i,
- &vf_stats);
- vfstats = nla_nest_start(skb, IFLA_VF_STATS);
- if (!vfstats) {
- nla_nest_cancel(skb, vf);
- nla_nest_cancel(skb, vfinfo);
- goto nla_put_failure;
- }
- if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
- vf_stats.rx_packets) ||
- nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
- vf_stats.tx_packets) ||
- nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
- vf_stats.rx_bytes) ||
- nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
- vf_stats.tx_bytes) ||
- nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
- vf_stats.broadcast) ||
- nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
- vf_stats.multicast))
- goto nla_put_failure;
- nla_nest_end(skb, vfstats);
- nla_nest_end(skb, vf);
}
+
nla_nest_end(skb, vfinfo);
}
diff --git a/net/core/scm.c b/net/core/scm.c
index 3b6899b7d810..8a1741b14302 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -305,6 +305,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
err = put_user(cmlen, &cm->cmsg_len);
if (!err) {
cmlen = CMSG_SPACE(i*sizeof(int));
+ if (msg->msg_controllen < cmlen)
+ cmlen = msg->msg_controllen;
msg->msg_control += cmlen;
msg->msg_controllen -= cmlen;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index aa41e6dd6429..b2df375ec9c2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3643,7 +3643,8 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
serr->ee.ee_info = tstype;
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
- if (sk->sk_protocol == IPPROTO_TCP)
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_type == SOCK_STREAM)
serr->ee.ee_data -= sk->sk_tskey;
}
@@ -4268,7 +4269,8 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
return NULL;
}
- memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
+ memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN,
+ 2 * ETH_ALEN);
skb->mac_header += VLAN_HLEN;
return skb;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 1e4dd54bfb5a..0d91f7dca751 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -433,8 +433,6 @@ static bool sock_needs_netstamp(const struct sock *sk)
}
}
-#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
-
static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
{
if (sk->sk_flags & flags) {
@@ -874,7 +872,8 @@ set_rcvbuf:
if (val & SOF_TIMESTAMPING_OPT_ID &&
!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
- if (sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_type == SOCK_STREAM) {
if (sk->sk_state != TCP_ESTABLISHED) {
ret = -EINVAL;
break;
@@ -1530,7 +1529,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
skb_queue_head_init(&newsk->sk_receive_queue);
skb_queue_head_init(&newsk->sk_write_queue);
- spin_lock_init(&newsk->sk_dst_lock);
rwlock_init(&newsk->sk_callback_lock);
lockdep_set_class_and_name(&newsk->sk_callback_lock,
af_callback_keys + newsk->sk_family,
@@ -1553,7 +1551,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
*/
is_charged = sk_filter_charge(newsk, filter);
- if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk))) {
+ if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
/* It is still raw copy of parent, so invalidate
* destructor and make plain sk_free() */
newsk->sk_destruct = NULL;
@@ -1607,7 +1605,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
u32 max_segs = 1;
- __sk_dst_set(sk, dst);
+ sk_dst_set(sk, dst);
sk->sk_route_caps = dst->dev->features;
if (sk->sk_route_caps & NETIF_F_GSO)
sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
@@ -1815,7 +1813,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
{
DEFINE_WAIT(wait);
- clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
for (;;) {
if (!timeo)
break;
@@ -1861,7 +1859,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
break;
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
err = -EAGAIN;
if (!timeo)
@@ -2048,9 +2046,9 @@ int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
DEFINE_WAIT(wait);
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb);
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
finish_wait(sk_sleep(sk), &wait);
return rc;
}
@@ -2388,7 +2386,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
} else
sk->sk_wq = NULL;
- spin_lock_init(&sk->sk_dst_lock);
rwlock_init(&sk->sk_callback_lock);
lockdep_set_class_and_name(&sk->sk_callback_lock,
af_callback_keys + sk->sk_family,
diff --git a/net/core/stream.c b/net/core/stream.c
index d70f77a0c889..b96f7a79e544 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -39,7 +39,7 @@ void sk_stream_write_space(struct sock *sk)
wake_up_interruptible_poll(&wq->wait, POLLOUT |
POLLWRNORM | POLLWRBAND);
if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
- sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
+ sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
rcu_read_unlock();
}
}
@@ -126,7 +126,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2;
while (1) {
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
@@ -139,7 +139,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
}
if (signal_pending(current))
goto do_interrupted;
- clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
if (sk_stream_memory_free(sk) && !vm_wait)
break;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index db5fc2440a23..9c6d0508e63a 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -202,7 +202,9 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
security_req_classify_flow(req, flowi6_to_flowi(&fl6));
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
@@ -219,7 +221,10 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
&ireq->ir_v6_loc_addr,
&ireq->ir_v6_rmt_addr);
fl6.daddr = ireq->ir_v6_rmt_addr;
- err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ rcu_read_lock();
+ err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+ np->tclass);
+ rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -387,6 +392,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *newnp;
const struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
struct inet_sock *newinet;
struct dccp6_sock *newdp6;
struct sock *newsk;
@@ -453,7 +459,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
* comment in that function for the gory details. -acme
*/
- __ip6_dst_store(newsk, dst, NULL, NULL);
+ ip6_dst_store(newsk, dst, NULL, NULL);
newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
NETIF_F_TSO);
newdp6 = (struct dccp6_sock *)newsk;
@@ -488,13 +494,15 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
* Yes, keeping reference count would be much more clever, but we make
* one more one thing there: reattach optmem to newsk.
*/
- if (np->opt != NULL)
- newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ RCU_INIT_POINTER(newnp->opt, opt);
+ }
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- if (newnp->opt != NULL)
- inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
- newnp->opt->opt_flen);
+ if (opt)
+ inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+ opt->opt_flen;
dccp_sync_mss(newsk, dst_mtu(dst));
@@ -757,6 +765,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct ipv6_pinfo *np = inet6_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
+ struct ipv6_txoptions *opt;
struct flowi6 fl6;
struct dst_entry *dst;
int addr_type;
@@ -856,7 +865,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.fl6_sport = inet->inet_sport;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ final_p = fl6_update_dst(&fl6, opt, &final);
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
@@ -873,12 +883,11 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
np->saddr = *saddr;
inet->inet_rcv_saddr = LOOPBACK4_IPV6;
- __ip6_dst_store(sk, dst, NULL, NULL);
+ ip6_dst_store(sk, dst, NULL, NULL);
icsk->icsk_ext_hdr_len = 0;
- if (np->opt != NULL)
- icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
- np->opt->opt_nflen);
+ if (opt)
+ icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
inet->inet_dport = usin->sin6_port;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b5cf13a28009..41e65804ddf5 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -339,8 +339,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
if (sk_stream_is_writeable(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else { /* send SIGIO later */
- set_bit(SOCK_ASYNC_NOSPACE,
- &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
/* Race breaker. If space is freed after
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 675cf94e04f8..13d6b1a6e0fc 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -678,6 +678,9 @@ static int dn_create(struct net *net, struct socket *sock, int protocol,
{
struct sock *sk;
+ if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+ return -EINVAL;
+
if (!net_eq(net, &init_net))
return -EAFNOSUPPORT;
@@ -1747,9 +1750,9 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
}
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target));
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
finish_wait(sk_sleep(sk), &wait);
}
@@ -2004,10 +2007,10 @@ static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
}
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
sk_wait_event(sk, &timeo,
!dn_queue_too_long(scp, queue, flags));
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
finish_wait(sk_sleep(sk), &wait);
continue;
}
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 4677b6fa6dda..ecc28cff08ab 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -67,7 +67,7 @@
* Returns the size of the result on success, -ve error code otherwise.
*/
int dns_query(const char *type, const char *name, size_t namelen,
- const char *options, char **_result, time_t *_expiry)
+ const char *options, char **_result, time64_t *_expiry)
{
struct key *rkey;
const struct user_key_payload *upayload;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 35a9788bb3ae..c7d1adca30d8 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -312,7 +312,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, u8 type)
return;
out:
- WARN_ON_ONCE("HSR: Could not send supervision frame\n");
+ WARN_ONCE(1, "HSR: Could not send supervision frame\n");
kfree_skb(skb);
}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 11c4ca13ec3b..5c5db6636704 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -257,6 +257,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
int try_loading_module = 0;
int err;
+ if (protocol < 0 || protocol >= IPPROTO_MAX)
+ return -EINVAL;
+
sock->state = SS_UNCONNECTED;
/* Look for the requested type/protocol pair. */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index cc8f3e506cde..473447593060 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1155,6 +1155,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_changeupper_info *info;
struct in_device *in_dev;
struct net *net = dev_net(dev);
unsigned int flags;
@@ -1193,6 +1194,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
case NETDEV_CHANGEMTU:
rt_cache_flush(net);
break;
+ case NETDEV_CHANGEUPPER:
+ info = ptr;
+ /* flush all routes if dev is linked to or unlinked from
+ * an L3 master device (e.g., VRF)
+ */
+ if (info->upper_dev && netif_is_l3_master(info->upper_dev))
+ fib_disable_ip(dev, NETDEV_DOWN, true);
+ break;
}
return NOTIFY_DONE;
}
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index e0fcbbbcfe54..bd903fe0f750 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -24,6 +24,7 @@ struct fou {
u16 type;
struct udp_offload udp_offloads;
struct list_head list;
+ struct rcu_head rcu;
};
#define FOU_F_REMCSUM_NOPARTIAL BIT(0)
@@ -417,7 +418,7 @@ static void fou_release(struct fou *fou)
list_del(&fou->list);
udp_tunnel_sock_release(sock);
- kfree(fou);
+ kfree_rcu(fou, rcu);
}
static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6baf36e11808..05e4cba14162 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2126,7 +2126,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
ASSERT_RTNL();
in_dev = ip_mc_find_dev(net, imr);
- if (!in_dev) {
+ if (!imr->imr_ifindex && !imr->imr_address.s_addr && !in_dev) {
ret = -ENODEV;
goto out;
}
@@ -2147,7 +2147,8 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
*imlp = iml->next_rcu;
- ip_mc_dec_group(in_dev, group);
+ if (in_dev)
+ ip_mc_dec_group(in_dev, group);
/* decrease mem now to avoid the memleak warning */
atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1feb15f23de8..46b9c887bede 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -563,7 +563,7 @@ static void reqsk_timer_handler(unsigned long data)
int max_retries, thresh;
u8 defer_accept;
- if (sk_listener->sk_state != TCP_LISTEN)
+ if (sk_state_load(sk_listener) != TCP_LISTEN)
goto drop;
max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
@@ -749,7 +749,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
* It is OK, because this socket enters to hash table only
* after validation is complete.
*/
- sk->sk_state = TCP_LISTEN;
+ sk_state_store(sk, TCP_LISTEN);
if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
inet->inet_sport = htons(inet->inet_num);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index f34c31defafe..a09fb0dec725 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -253,9 +253,6 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
p.i_key = p.o_key = 0;
p.i_flags = p.o_flags = 0;
- if (p.iph.ttl)
- p.iph.frag_off |= htons(IP_DF);
-
err = ip_tunnel_ioctl(dev, &p, cmd);
if (err)
return err;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 92dd4b74d513..c3a38353f5dc 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -134,7 +134,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
struct mfc_cache *c, struct rtmsg *rtm);
static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
int cmd);
-static void mroute_clean_tables(struct mr_table *mrt);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
static void ipmr_expire_process(unsigned long arg);
#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -350,7 +350,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
static void ipmr_free_table(struct mr_table *mrt)
{
del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, true);
kfree(mrt);
}
@@ -441,10 +441,6 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
return dev;
failure:
- /* allow the register to be completed before unregistering. */
- rtnl_unlock();
- rtnl_lock();
-
unregister_netdevice(dev);
return NULL;
}
@@ -540,10 +536,6 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
return dev;
failure:
- /* allow the register to be completed before unregistering. */
- rtnl_unlock();
- rtnl_lock();
-
unregister_netdevice(dev);
return NULL;
}
@@ -1208,7 +1200,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct mr_table *mrt)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
int i;
LIST_HEAD(list);
@@ -1217,8 +1209,9 @@ static void mroute_clean_tables(struct mr_table *mrt)
/* Shut down all active vif entries */
for (i = 0; i < mrt->maxvif; i++) {
- if (!(mrt->vif_table[i].flags & VIFF_STATIC))
- vif_delete(mrt, i, 0, &list);
+ if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
+ continue;
+ vif_delete(mrt, i, 0, &list);
}
unregister_netdevice_many(&list);
@@ -1226,7 +1219,7 @@ static void mroute_clean_tables(struct mr_table *mrt)
for (i = 0; i < MFC_LINES; i++) {
list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
- if (c->mfc_flags & MFC_STATIC)
+ if (!all && (c->mfc_flags & MFC_STATIC))
continue;
list_del_rcu(&c->list);
mroute_netlink_event(mrt, c, RTM_DELROUTE);
@@ -1261,7 +1254,7 @@ static void mrtsock_destruct(struct sock *sk)
NETCONFA_IFINDEX_ALL,
net->ipv4.devconf_all);
RCU_INIT_POINTER(mrt->mroute_sk, NULL);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, false);
}
}
rtnl_unlock();
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index a35584176535..c187c60e3e0c 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -60,6 +60,7 @@ config NFT_REJECT_IPV4
config NFT_DUP_IPV4
tristate "IPv4 nf_tables packet duplication support"
+ depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DUP_IPV4
help
This module enables IPv4 packet duplication support for nf_tables.
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 657d2307f031..b3ca21b2ba9b 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -45,7 +45,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
struct net *net = nf_ct_net(ct);
const struct nf_conn *master = ct->master;
struct nf_conntrack_expect *other_exp;
- struct nf_conntrack_tuple t;
+ struct nf_conntrack_tuple t = {};
const struct nf_ct_pptp_master *ct_pptp_info;
const struct nf_nat_pptp *nat_pptp_info;
struct nf_nat_range range;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 8c0d0bdc2a7c..bc35f1842512 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -406,10 +406,12 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
ip_select_ident(net, skb, NULL);
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+ skb->transport_header += iphlen;
+ if (iph->protocol == IPPROTO_ICMP &&
+ length >= iphlen + sizeof(struct icmphdr))
+ icmp_out_count(net, ((struct icmphdr *)
+ skb_transport_header(skb))->type);
}
- if (iph->protocol == IPPROTO_ICMP)
- icmp_out_count(net, ((struct icmphdr *)
- skb_transport_header(skb))->type);
err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, rt->dst.dev,
@@ -599,8 +601,11 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
(inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0);
- if (!saddr && ipc.oif)
- l3mdev_get_saddr(net, ipc.oif, &fl4);
+ if (!saddr && ipc.oif) {
+ err = l3mdev_get_saddr(net, ipc.oif, &fl4);
+ if (err < 0)
+ goto done;
+ }
if (!inet->hdrincl) {
rfv.msg = msg;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0cfa7c0c1e80..c82cca18c90f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -451,11 +451,14 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
unsigned int mask;
struct sock *sk = sock->sk;
const struct tcp_sock *tp = tcp_sk(sk);
+ int state;
sock_rps_record_flow(sk);
sock_poll_wait(file, sk_sleep(sk), wait);
- if (sk->sk_state == TCP_LISTEN)
+
+ state = sk_state_load(sk);
+ if (state == TCP_LISTEN)
return inet_csk_listen_poll(sk);
/* Socket is not locked. We are protected from async events
@@ -492,14 +495,14 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
* NOTE. Check for TCP_CLOSE is added. The goal is to prevent
* blocking on fresh not-connected or disconnected socket. --ANK
*/
- if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE)
+ if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
mask |= POLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLIN | POLLRDNORM | POLLRDHUP;
/* Connected or passive Fast Open socket? */
- if (sk->sk_state != TCP_SYN_SENT &&
- (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk)) {
+ if (state != TCP_SYN_SENT &&
+ (state != TCP_SYN_RECV || tp->fastopen_rsk)) {
int target = sock_rcvlowat(sk, 0, INT_MAX);
if (tp->urg_seq == tp->copied_seq &&
@@ -507,9 +510,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
tp->urg_data)
target++;
- /* Potential race condition. If read of tp below will
- * escape above sk->sk_state, we can be illegally awaken
- * in SYN_* states. */
if (tp->rcv_nxt - tp->copied_seq >= target)
mask |= POLLIN | POLLRDNORM;
@@ -517,8 +517,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
if (sk_stream_is_writeable(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else { /* send SIGIO later */
- set_bit(SOCK_ASYNC_NOSPACE,
- &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
/* Race breaker. If space is freed after
@@ -906,7 +905,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
goto out_err;
}
- clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
mss_now = tcp_send_mss(sk, &size_goal, flags);
copied = 0;
@@ -1134,7 +1133,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
}
/* This should be in poll */
- clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
mss_now = tcp_send_mss(sk, &size_goal, flags);
@@ -1934,7 +1933,7 @@ void tcp_set_state(struct sock *sk, int state)
/* Change state AFTER socket is unhashed to avoid closed
* socket sitting in hash tables.
*/
- sk->sk_state = state;
+ sk_state_store(sk, state);
#ifdef STATE_TRACE
SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
@@ -2644,7 +2643,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
if (sk->sk_type != SOCK_STREAM)
return;
- info->tcpi_state = sk->sk_state;
+ info->tcpi_state = sk_state_load(sk);
+
info->tcpi_ca_state = icsk->icsk_ca_state;
info->tcpi_retransmits = icsk->icsk_retransmits;
info->tcpi_probes = icsk->icsk_probes_out;
@@ -2672,7 +2672,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_snd_mss = tp->mss_cache;
info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
- if (sk->sk_state == TCP_LISTEN) {
+ if (info->tcpi_state == TCP_LISTEN) {
info->tcpi_unacked = sk->sk_ack_backlog;
info->tcpi_sacked = sk->sk_max_ack_backlog;
} else {
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 479f34946177..b31604086edd 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -21,7 +21,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
{
struct tcp_info *info = _info;
- if (sk->sk_state == TCP_LISTEN) {
+ if (sk_state_load(sk) == TCP_LISTEN) {
r->idiag_rqueue = sk->sk_ack_backlog;
r->idiag_wqueue = sk->sk_max_ack_backlog;
} else if (sk->sk_type == SOCK_STREAM) {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fdd88c3803a6..d4c51158470f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2478,6 +2478,9 @@ static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
int newly_acked_sacked = prior_unsacked -
(tp->packets_out - tp->sacked_out);
+ if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
+ return;
+
tp->prr_delivered += newly_acked_sacked;
if (delta < 0) {
u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
@@ -4481,19 +4484,34 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
{
struct sk_buff *skb;
+ int err = -ENOMEM;
+ int data_len = 0;
bool fragstolen;
if (size == 0)
return 0;
- skb = alloc_skb(size, sk->sk_allocation);
+ if (size > PAGE_SIZE) {
+ int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
+
+ data_len = npages << PAGE_SHIFT;
+ size = data_len + (size & ~PAGE_MASK);
+ }
+ skb = alloc_skb_with_frags(size - data_len, data_len,
+ PAGE_ALLOC_COSTLY_ORDER,
+ &err, sk->sk_allocation);
if (!skb)
goto err;
+ skb_put(skb, size - data_len);
+ skb->data_len = data_len;
+ skb->len = size;
+
if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
goto err_free;
- if (memcpy_from_msg(skb_put(skb, size), msg, size))
+ err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
+ if (err)
goto err_free;
TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
@@ -4509,7 +4527,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
err_free:
kfree_skb(skb);
err:
- return -ENOMEM;
+ return err;
+
}
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
@@ -5667,6 +5686,7 @@ discard:
}
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+ tp->copied_seq = tp->rcv_nxt;
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
/* RFC1323: The window in SYN & SYN/ACK segments is
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 950e28c0cdf2..d8841a2f1569 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -921,7 +921,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
}
md5sig = rcu_dereference_protected(tp->md5sig_info,
- sock_owned_by_user(sk));
+ sock_owned_by_user(sk) ||
+ lockdep_is_held(&sk->sk_lock.slock));
if (!md5sig) {
md5sig = kmalloc(sizeof(*md5sig), gfp);
if (!md5sig)
@@ -1492,7 +1493,7 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
if (likely(sk->sk_rx_dst))
skb_dst_drop(skb);
else
- skb_dst_force(skb);
+ skb_dst_force_safe(skb);
__skb_queue_tail(&tp->ucopy.prequeue, skb);
tp->ucopy.memory += skb->truesize;
@@ -1720,8 +1721,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- if (dst) {
- dst_hold(dst);
+ if (dst && dst_hold_safe(dst)) {
sk->sk_rx_dst = dst;
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
}
@@ -2158,6 +2158,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
__u16 destp = ntohs(inet->inet_dport);
__u16 srcp = ntohs(inet->inet_sport);
int rx_queue;
+ int state;
if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
@@ -2175,17 +2176,18 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
timer_expires = jiffies;
}
- if (sk->sk_state == TCP_LISTEN)
+ state = sk_state_load(sk);
+ if (state == TCP_LISTEN)
rx_queue = sk->sk_ack_backlog;
else
- /*
- * because we dont lock socket, we might find a transient negative value
+ /* Because we don't lock the socket,
+ * we might find a transient negative value.
*/
rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
- i, src, srcp, dest, destp, sk->sk_state,
+ i, src, srcp, dest, destp, state,
tp->write_seq - tp->snd_una,
rx_queue,
timer_active,
@@ -2199,8 +2201,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
tp->snd_cwnd,
- sk->sk_state == TCP_LISTEN ?
- (fastopenq ? fastopenq->max_qlen : 0) :
+ state == TCP_LISTEN ?
+ fastopenq->max_qlen :
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index cb7ca569052c..9bfc39ff2285 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3150,7 +3150,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req;
- int syn_loss = 0, space, err = 0, copied;
+ int syn_loss = 0, space, err = 0;
unsigned long last_syn_loss = 0;
struct sk_buff *syn_data;
@@ -3188,17 +3188,18 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
goto fallback;
syn_data->ip_summed = CHECKSUM_PARTIAL;
memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
- copied = copy_from_iter(skb_put(syn_data, space), space,
- &fo->data->msg_iter);
- if (unlikely(!copied)) {
- kfree_skb(syn_data);
- goto fallback;
- }
- if (copied != space) {
- skb_trim(syn_data, copied);
- space = copied;
+ if (space) {
+ int copied = copy_from_iter(skb_put(syn_data, space), space,
+ &fo->data->msg_iter);
+ if (unlikely(!copied)) {
+ kfree_skb(syn_data);
+ goto fallback;
+ }
+ if (copied != space) {
+ skb_trim(syn_data, copied);
+ space = copied;
+ }
}
-
/* No more data pending in inet_wait_for_connect() */
if (space == fo->size)
fo->data = NULL;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c9c716a483e4..193ba1fa8a9a 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -168,7 +168,7 @@ static int tcp_write_timeout(struct sock *sk)
dst_negative_advice(sk);
if (tp->syn_fastopen || tp->syn_data)
tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
- if (tp->syn_data)
+ if (tp->syn_data && icsk->icsk_retransmits == 1)
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPFASTOPENACTIVEFAIL);
}
@@ -176,6 +176,18 @@ static int tcp_write_timeout(struct sock *sk)
syn_set = true;
} else {
if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
+ /* Some middle-boxes may black-hole Fast Open _after_
+ * the handshake. Therefore we conservatively disable
+ * Fast Open on this path on recurring timeouts with
+ * few or zero bytes acked after Fast Open.
+ */
+ if (tp->syn_data_acked &&
+ tp->bytes_acked <= tp->rx_opt.mss_clamp) {
+ tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
+ if (icsk->icsk_retransmits == sysctl_tcp_retries1)
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENACTIVEFAIL);
+ }
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 24ec14f9825c..c43890848641 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -100,7 +100,6 @@
#include <linux/slab.h>
#include <net/tcp_states.h>
#include <linux/skbuff.h>
-#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <net/net_namespace.h>
@@ -1026,8 +1025,11 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flow_flags,
faddr, saddr, dport, inet->inet_sport);
- if (!saddr && ipc.oif)
- l3mdev_get_saddr(net, ipc.oif, fl4);
+ if (!saddr && ipc.oif) {
+ err = l3mdev_get_saddr(net, ipc.oif, fl4);
+ if (err < 0)
+ goto out;
+ }
security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
rt = ip_route_output_flow(net, fl4, sk);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 1e0c3c835a63..7b0edb37a115 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -259,7 +259,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
xfrm_dst_ifdown(dst, dev);
}
-static struct dst_ops xfrm4_dst_ops = {
+static struct dst_ops xfrm4_dst_ops_template = {
.family = AF_INET,
.gc = xfrm4_garbage_collect,
.update_pmtu = xfrm4_update_pmtu,
@@ -273,7 +273,7 @@ static struct dst_ops xfrm4_dst_ops = {
static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
.family = AF_INET,
- .dst_ops = &xfrm4_dst_ops,
+ .dst_ops = &xfrm4_dst_ops_template,
.dst_lookup = xfrm4_dst_lookup,
.get_saddr = xfrm4_get_saddr,
.decode_session = _decode_session4,
@@ -295,7 +295,7 @@ static struct ctl_table xfrm4_policy_table[] = {
{ }
};
-static int __net_init xfrm4_net_init(struct net *net)
+static int __net_init xfrm4_net_sysctl_init(struct net *net)
{
struct ctl_table *table;
struct ctl_table_header *hdr;
@@ -323,7 +323,7 @@ err_alloc:
return -ENOMEM;
}
-static void __net_exit xfrm4_net_exit(struct net *net)
+static void __net_exit xfrm4_net_sysctl_exit(struct net *net)
{
struct ctl_table *table;
@@ -335,12 +335,44 @@ static void __net_exit xfrm4_net_exit(struct net *net)
if (!net_eq(net, &init_net))
kfree(table);
}
+#else /* CONFIG_SYSCTL */
+static int inline xfrm4_net_sysctl_init(struct net *net)
+{
+ return 0;
+}
+
+static void inline xfrm4_net_sysctl_exit(struct net *net)
+{
+}
+#endif
+
+static int __net_init xfrm4_net_init(struct net *net)
+{
+ int ret;
+
+ memcpy(&net->xfrm.xfrm4_dst_ops, &xfrm4_dst_ops_template,
+ sizeof(xfrm4_dst_ops_template));
+ ret = dst_entries_init(&net->xfrm.xfrm4_dst_ops);
+ if (ret)
+ return ret;
+
+ ret = xfrm4_net_sysctl_init(net);
+ if (ret)
+ dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
+
+ return ret;
+}
+
+static void __net_exit xfrm4_net_exit(struct net *net)
+{
+ xfrm4_net_sysctl_exit(net);
+ dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
+}
static struct pernet_operations __net_initdata xfrm4_net_ops = {
.init = xfrm4_net_init,
.exit = xfrm4_net_exit,
};
-#endif
static void __init xfrm4_policy_init(void)
{
@@ -349,13 +381,9 @@ static void __init xfrm4_policy_init(void)
void __init xfrm4_init(void)
{
- dst_entries_init(&xfrm4_dst_ops);
-
xfrm4_state_init();
xfrm4_policy_init();
xfrm4_protocol_init();
-#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm4_net_ops);
-#endif
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d84742f003a9..1f21087accab 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -350,6 +350,12 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
setup_timer(&ndev->rs_timer, addrconf_rs_timer,
(unsigned long)ndev);
memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
+
+ if (ndev->cnf.stable_secret.initialized)
+ ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+ else
+ ndev->addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64;
+
ndev->cnf.mtu6 = dev->mtu;
ndev->cnf.sysctl = NULL;
ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
@@ -2455,7 +2461,7 @@ ok:
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
if (in6_dev->cnf.optimistic_dad &&
!net->ipv6.devconf_all->forwarding && sllao)
- addr_flags = IFA_F_OPTIMISTIC;
+ addr_flags |= IFA_F_OPTIMISTIC;
#endif
/* Do not allow to create too much of autoconfigured
@@ -3642,7 +3648,7 @@ static void addrconf_dad_work(struct work_struct *w)
/* send a neighbour solicitation for our addr */
addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
- ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any, NULL);
+ ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any);
out:
in6_ifa_put(ifp);
rtnl_unlock();
@@ -5363,13 +5369,10 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
goto out;
}
- if (!write) {
- err = snprintf(str, sizeof(str), "%pI6",
- &secret->secret);
- if (err >= sizeof(str)) {
- err = -EIO;
- goto out;
- }
+ err = snprintf(str, sizeof(str), "%pI6", &secret->secret);
+ if (err >= sizeof(str)) {
+ err = -EIO;
+ goto out;
}
err = proc_dostring(&lctl, write, buffer, lenp, ppos);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 882124ebb438..a8f6986dcbe5 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -552,7 +552,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh)
rcu_read_lock();
p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
- if (p && ip6addrlbl_hold(p))
+ if (p && !ip6addrlbl_hold(p))
p = NULL;
lseq = ip6addrlbl_table.seq;
rcu_read_unlock();
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 44bb66bde0e2..9f5137cd604e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -109,6 +109,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
int try_loading_module = 0;
int err;
+ if (protocol < 0 || protocol >= IPPROTO_MAX)
+ return -EINVAL;
+
/* Look for the requested type/protocol pair. */
lookup_protocol:
err = -ESOCKTNOSUPPORT;
@@ -428,9 +431,11 @@ void inet6_destroy_sock(struct sock *sk)
/* Free tx options */
- opt = xchg(&np->opt, NULL);
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
}
EXPORT_SYMBOL_GPL(inet6_destroy_sock);
@@ -659,7 +664,10 @@ int inet6_sk_rebuild_header(struct sock *sk)
fl6.fl6_sport = inet->inet_sport;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt),
+ &final);
+ rcu_read_unlock();
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
@@ -668,7 +676,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
return PTR_ERR(dst);
}
- __ip6_dst_store(sk, dst, NULL, NULL);
+ ip6_dst_store(sk, dst, NULL, NULL);
}
return 0;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index d70b0238f468..517c55b01ba8 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -167,8 +167,10 @@ ipv4_connected:
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- opt = flowlabel ? flowlabel->opt : np->opt;
+ rcu_read_lock();
+ opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
final_p = fl6_update_dst(&fl6, opt, &final);
+ rcu_read_unlock();
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
err = 0;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index ce203b0402be..ea7c4d64a00a 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -727,6 +727,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
*((char **)&opt2->dst1opt) += dif;
if (opt2->srcrt)
*((char **)&opt2->srcrt) += dif;
+ atomic_set(&opt2->refcnt, 1);
}
return opt2;
}
@@ -790,7 +791,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
return ERR_PTR(-ENOBUFS);
memset(opt2, 0, tot_len);
-
+ atomic_set(&opt2->refcnt, 1);
opt2->tot_len = tot_len;
p = (char *)(opt2 + 1);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 36c5a98b0472..0a37ddc7af51 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -834,11 +834,6 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
}
-/*
- * Special lock-class for __icmpv6_sk:
- */
-static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
-
static int __net_init icmpv6_sk_init(struct net *net)
{
struct sock *sk;
@@ -860,15 +855,6 @@ static int __net_init icmpv6_sk_init(struct net *net)
net->ipv6.icmp_sk[i] = sk;
- /*
- * Split off their lock-class, because sk->sk_dst_lock
- * gets used from softirqs, which is safe for
- * __icmpv6_sk (because those never get directly used
- * via userspace syscalls), but unsafe for normal sockets.
- */
- lockdep_set_class(&sk->sk_dst_lock,
- &icmpv6_socket_sk_dst_lock_key);
-
/* Enough space for 2 64K ICMP packets, including
* sk_buff struct overhead.
*/
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 5d1c7cee2cb2..a7ca2cde2ecb 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -78,7 +78,9 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
memset(fl6, 0, sizeof(*fl6));
fl6->flowi6_proto = proto;
fl6->daddr = ireq->ir_v6_rmt_addr;
- final_p = fl6_update_dst(fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
fl6->saddr = ireq->ir_v6_loc_addr;
fl6->flowi6_oif = ireq->ir_iif;
fl6->flowi6_mark = ireq->ir_mark;
@@ -109,14 +111,6 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
static inline
-void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr)
-{
- __ip6_dst_store(sk, dst, daddr, saddr);
-}
-
-static inline
struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
{
return __sk_dst_check(sk, cookie);
@@ -142,14 +136,16 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
fl6->fl6_dport = inet->inet_dport;
security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
- final_p = fl6_update_dst(fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
dst = __inet6_csk_dst_check(sk, np->dst_cookie);
if (!dst) {
dst = ip6_dst_lookup_flow(sk, fl6, final_p);
if (!IS_ERR(dst))
- __inet6_csk_dst_store(sk, dst, NULL, NULL);
+ ip6_dst_store(sk, dst, NULL, NULL);
}
return dst;
}
@@ -175,7 +171,8 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
/* Restore final destination back after routing done */
fl6.daddr = sk->sk_v6_daddr;
- res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+ np->tclass);
rcu_read_unlock();
return res;
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3c7b9310b33f..e5ea177d34c6 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1571,13 +1571,11 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
return -EEXIST;
} else {
t = nt;
-
- ip6gre_tunnel_unlink(ign, t);
- ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
- ip6gre_tunnel_link(ign, t);
- netdev_state_change(dev);
}
+ ip6gre_tunnel_unlink(ign, t);
+ ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
+ ip6gre_tunnel_link(ign, t);
return 0;
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index eabffbb89795..137fca42aaa6 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -177,7 +177,7 @@ void ip6_tnl_dst_reset(struct ip6_tnl *t)
int i;
for_each_possible_cpu(i)
- ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL);
+ ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
}
EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index ad19136086dd..a10e77103c88 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -118,7 +118,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
int cmd);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt);
+static void mroute_clean_tables(struct mr6_table *mrt, bool all);
static void ipmr_expire_process(unsigned long arg);
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
@@ -334,7 +334,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
static void ip6mr_free_table(struct mr6_table *mrt)
{
del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, true);
kfree(mrt);
}
@@ -765,10 +765,6 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
return dev;
failure:
- /* allow the register to be completed before unregistering. */
- rtnl_unlock();
- rtnl_lock();
-
unregister_netdevice(dev);
return NULL;
}
@@ -1542,7 +1538,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct mr6_table *mrt)
+static void mroute_clean_tables(struct mr6_table *mrt, bool all)
{
int i;
LIST_HEAD(list);
@@ -1552,8 +1548,9 @@ static void mroute_clean_tables(struct mr6_table *mrt)
* Shut down all active vif entries
*/
for (i = 0; i < mrt->maxvif; i++) {
- if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
- mif6_delete(mrt, i, &list);
+ if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+ continue;
+ mif6_delete(mrt, i, &list);
}
unregister_netdevice_many(&list);
@@ -1562,7 +1559,7 @@ static void mroute_clean_tables(struct mr6_table *mrt)
*/
for (i = 0; i < MFC6_LINES; i++) {
list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
- if (c->mfc_flags & MFC_STATIC)
+ if (!all && (c->mfc_flags & MFC_STATIC))
continue;
write_lock_bh(&mrt_lock);
list_del(&c->list);
@@ -1625,7 +1622,7 @@ int ip6mr_sk_done(struct sock *sk)
net->ipv6.devconf_all);
write_unlock_bh(&mrt_lock);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, false);
err = 0;
break;
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 63e6956917c9..4449ad1f8114 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -111,7 +111,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
}
- opt = xchg(&inet6_sk(sk)->opt, opt);
+ opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt,
+ opt);
sk_dst_reset(sk);
return opt;
@@ -231,9 +232,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
sk->sk_socket->ops = &inet_dgram_ops;
sk->sk_family = PF_INET;
}
- opt = xchg(&np->opt, NULL);
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ opt = xchg((__force struct ipv6_txoptions **)&np->opt,
+ NULL);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
pktopt = xchg(&np->pktoptions, NULL);
kfree_skb(pktopt);
@@ -403,7 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
break;
- opt = ipv6_renew_options(sk, np->opt, optname,
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ opt = ipv6_renew_options(sk, opt, optname,
(struct ipv6_opt_hdr __user *)optval,
optlen);
if (IS_ERR(opt)) {
@@ -432,8 +437,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
retv = 0;
opt = ipv6_update_options(sk, opt);
sticky_done:
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
break;
}
@@ -486,6 +493,7 @@ sticky_done:
break;
memset(opt, 0, sizeof(*opt));
+ atomic_set(&opt->refcnt, 1);
opt->tot_len = sizeof(*opt) + optlen;
retv = -EFAULT;
if (copy_from_user(opt+1, optval, optlen))
@@ -502,8 +510,10 @@ update:
retv = 0;
opt = ipv6_update_options(sk, opt);
done:
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
break;
}
case IPV6_UNICAST_HOPS:
@@ -1110,10 +1120,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
case IPV6_RTHDR:
case IPV6_DSTOPTS:
{
+ struct ipv6_txoptions *opt;
lock_sock(sk);
- len = ipv6_getsockopt_sticky(sk, np->opt,
- optname, optval, len);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len);
release_sock(sk);
/* check if ipv6_getsockopt_sticky() returns err code */
if (len < 0)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 124338a39e29..5ee56d0a8699 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1651,7 +1651,6 @@ out:
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
} else {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
}
@@ -2015,7 +2014,6 @@ out:
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, type);
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len);
} else
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 3e0f855e1bea..84afb9a77278 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -556,8 +556,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
}
void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
- const struct in6_addr *daddr, const struct in6_addr *saddr,
- struct sk_buff *oskb)
+ const struct in6_addr *daddr, const struct in6_addr *saddr)
{
struct sk_buff *skb;
struct in6_addr addr_buf;
@@ -593,9 +592,6 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
dev->dev_addr);
- if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE) && oskb)
- skb_dst_copy(skb, oskb);
-
ndisc_send_skb(skb, daddr, saddr);
}
@@ -682,12 +678,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
"%s: trying to ucast probe in NUD_INVALID: %pI6\n",
__func__, target);
}
- ndisc_send_ns(dev, target, target, saddr, skb);
+ ndisc_send_ns(dev, target, target, saddr);
} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
neigh_app_ns(neigh);
} else {
addrconf_addr_solict_mult(target, &mcaddr);
- ndisc_send_ns(dev, target, &mcaddr, saddr, skb);
+ ndisc_send_ns(dev, target, &mcaddr, saddr);
}
}
@@ -1187,7 +1183,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
*/
if (!in6_dev->cnf.accept_ra_from_local &&
ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
- NULL, 0)) {
+ in6_dev->dev, 0)) {
ND_PRINTK(2, info,
"RA from local address detected on dev: %s: default router ignored\n",
skb->dev->name);
@@ -1341,7 +1337,7 @@ skip_linkparms:
#ifdef CONFIG_IPV6_ROUTE_INFO
if (!in6_dev->cnf.accept_ra_from_local &&
ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
- NULL, 0)) {
+ in6_dev->dev, 0)) {
ND_PRINTK(2, info,
"RA from local address detected on dev: %s: router info ignored.\n",
skb->dev->name);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index f6a024e141e5..e10a04c9cdc7 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -49,6 +49,7 @@ config NFT_REJECT_IPV6
config NFT_DUP_IPV6
tristate "IPv6 nf_tables packet duplication support"
+ depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DUP_IPV6
help
This module enables IPv6 packet duplication support for nf_tables.
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d5efeb87350e..bab4441ed4e4 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -190,7 +190,7 @@ static void nf_ct_frag6_expire(unsigned long data)
/* Creation primitives. */
static inline struct frag_queue *fq_find(struct net *net, __be32 id,
u32 user, struct in6_addr *src,
- struct in6_addr *dst, u8 ecn)
+ struct in6_addr *dst, int iif, u8 ecn)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
@@ -200,6 +200,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
arg.user = user;
arg.src = src;
arg.dst = dst;
+ arg.iif = iif;
arg.ecn = ecn;
local_bh_disable();
@@ -601,7 +602,7 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use
fhdr = (struct frag_hdr *)skb_transport_header(clone);
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
- ip6_frag_ecn(hdr));
+ skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
goto ret_orig;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index dc65ec198f7c..99140986e887 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -733,6 +733,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,
static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ipv6_txoptions opt_space;
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
@@ -839,8 +840,10 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!(opt->opt_nflen|opt->opt_flen))
opt = NULL;
}
- if (!opt)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -906,6 +909,7 @@ done:
dst_release(dst);
out:
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
return err < 0 ? err : len;
do_confirm:
dst_confirm(dst);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 44e21a03cfc3..45f5ae51de65 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -108,7 +108,10 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
return fq->id == arg->id &&
fq->user == arg->user &&
ipv6_addr_equal(&fq->saddr, arg->src) &&
- ipv6_addr_equal(&fq->daddr, arg->dst);
+ ipv6_addr_equal(&fq->daddr, arg->dst) &&
+ (arg->iif == fq->iif ||
+ !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
+ IPV6_ADDR_LINKLOCAL)));
}
EXPORT_SYMBOL(ip6_frag_match);
@@ -180,7 +183,7 @@ static void ip6_frag_expire(unsigned long data)
static struct frag_queue *
fq_find(struct net *net, __be32 id, const struct in6_addr *src,
- const struct in6_addr *dst, u8 ecn)
+ const struct in6_addr *dst, int iif, u8 ecn)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
@@ -190,6 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src,
arg.user = IP6_DEFRAG_LOCAL_DELIVER;
arg.src = src;
arg.dst = dst;
+ arg.iif = iif;
arg.ecn = ecn;
hash = inet6_hash_frag(id, src, dst);
@@ -551,7 +555,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
}
fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
- ip6_frag_ecn(hdr));
+ skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
if (fq) {
int ret;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c8bc9b4ac328..826e6aa44f8d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -404,6 +404,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
}
}
+static bool __rt6_check_expired(const struct rt6_info *rt)
+{
+ if (rt->rt6i_flags & RTF_EXPIRES)
+ return time_after(jiffies, rt->dst.expires);
+ else
+ return false;
+}
+
static bool rt6_check_expired(const struct rt6_info *rt)
{
if (rt->rt6i_flags & RTF_EXPIRES) {
@@ -515,7 +523,7 @@ static void rt6_probe_deferred(struct work_struct *w)
container_of(w, struct __rt6_probe_work, work);
addrconf_addr_solict_mult(&work->target, &mcaddr);
- ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
+ ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
dev_put(work->dev);
kfree(work);
}
@@ -1252,7 +1260,8 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
{
- if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
+ if (!__rt6_check_expired(rt) &&
+ rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
rt6_check((struct rt6_info *)(rt->dst.from), cookie))
return &rt->dst;
else
@@ -1272,7 +1281,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
rt6_dst_from_metrics_check(rt);
- if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
+ if (rt->rt6i_flags & RTF_PCPU ||
+ (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
return rt6_dst_from_check(rt, cookie);
else
return rt6_check(rt, cookie);
@@ -1322,6 +1332,12 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
}
+static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
+{
+ return !(rt->rt6i_flags & RTF_CACHE) &&
+ (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
+}
+
static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
const struct ipv6hdr *iph, u32 mtu)
{
@@ -1335,7 +1351,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (mtu >= dst_mtu(dst))
return;
- if (rt6->rt6i_flags & RTF_CACHE) {
+ if (!rt6_cache_allowed_for_pmtu(rt6)) {
rt6_do_update_pmtu(rt6, mtu);
} else {
const struct in6_addr *daddr, *saddr;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb8f2fa1c7fb..eaf7ac496d50 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -222,7 +222,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
fl6.daddr = ireq->ir_v6_rmt_addr;
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
fl6.saddr = ireq->ir_v6_loc_addr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = ireq->ir_mark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5baa8e754e41..6b8a8a9091fa 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -93,10 +93,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- if (dst) {
+ if (dst && dst_hold_safe(dst)) {
const struct rt6_info *rt = (const struct rt6_info *)dst;
- dst_hold(dst);
sk->sk_rx_dst = dst;
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
@@ -120,6 +119,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
+ struct ipv6_txoptions *opt;
struct flowi6 fl6;
struct dst_entry *dst;
int addr_type;
@@ -235,7 +235,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.fl6_dport = usin->sin6_port;
fl6.fl6_sport = inet->inet_sport;
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ final_p = fl6_update_dst(&fl6, opt, &final);
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
@@ -255,7 +256,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
inet->inet_rcv_saddr = LOOPBACK4_IPV6;
sk->sk_gso_type = SKB_GSO_TCPV6;
- __ip6_dst_store(sk, dst, NULL, NULL);
+ ip6_dst_store(sk, dst, NULL, NULL);
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp &&
@@ -263,9 +264,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
tcp_fetch_timewait_stamp(sk, dst);
icsk->icsk_ext_hdr_len = 0;
- if (np->opt)
- icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
- np->opt->opt_nflen);
+ if (opt)
+ icsk->icsk_ext_hdr_len = opt->opt_flen +
+ opt->opt_nflen;
tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
@@ -461,7 +462,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
if (np->repflow && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
- err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
+ err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
+ np->tclass);
err = net_xmit_eval(err);
}
@@ -972,6 +974,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
struct inet_request_sock *ireq;
struct ipv6_pinfo *newnp;
const struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
struct tcp6_sock *newtcp6sk;
struct inet_sock *newinet;
struct tcp_sock *newtp;
@@ -1056,7 +1059,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
*/
newsk->sk_gso_type = SKB_GSO_TCPV6;
- __ip6_dst_store(newsk, dst, NULL, NULL);
+ ip6_dst_store(newsk, dst, NULL, NULL);
inet6_sk_rx_dst_set(newsk, skb);
newtcp6sk = (struct tcp6_sock *)newsk;
@@ -1098,13 +1101,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
but we make one more one thing there: reattach optmem
to newsk.
*/
- if (np->opt)
- newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ RCU_INIT_POINTER(newnp->opt, opt);
+ }
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- if (newnp->opt)
- inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
- newnp->opt->opt_flen);
+ if (opt)
+ inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+ opt->opt_flen;
tcp_ca_openreq_child(newsk, dst);
@@ -1690,6 +1695,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
const struct tcp_sock *tp = tcp_sk(sp);
const struct inet_connection_sock *icsk = inet_csk(sp);
const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
+ int rx_queue;
+ int state;
dest = &sp->sk_v6_daddr;
src = &sp->sk_v6_rcv_saddr;
@@ -1710,6 +1717,15 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
timer_expires = jiffies;
}
+ state = sk_state_load(sp);
+ if (state == TCP_LISTEN)
+ rx_queue = sp->sk_ack_backlog;
+ else
+ /* Because we don't lock the socket,
+ * we might find a transient negative value.
+ */
+ rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
+
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
"%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
@@ -1718,9 +1734,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
src->s6_addr32[2], src->s6_addr32[3], srcp,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
- sp->sk_state,
- tp->write_seq-tp->snd_una,
- (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
+ state,
+ tp->write_seq - tp->snd_una,
+ rx_queue,
timer_active,
jiffies_delta_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
@@ -1732,7 +1748,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
tp->snd_cwnd,
- sp->sk_state == TCP_LISTEN ?
+ state == TCP_LISTEN ?
fastopenq->max_qlen :
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 01bcb49619ee..9da3287a3923 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1110,6 +1110,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
struct ipv6_txoptions *opt = NULL;
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct flowi6 fl6;
struct dst_entry *dst;
@@ -1263,8 +1264,10 @@ do_udp_sendmsg:
opt = NULL;
connected = 0;
}
- if (!opt)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -1373,6 +1376,7 @@ release_dst:
out:
dst_release(dst);
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
if (!err)
return len;
/*
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 5643423fe67a..c074771a10f7 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -279,7 +279,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
xfrm_dst_ifdown(dst, dev);
}
-static struct dst_ops xfrm6_dst_ops = {
+static struct dst_ops xfrm6_dst_ops_template = {
.family = AF_INET6,
.gc = xfrm6_garbage_collect,
.update_pmtu = xfrm6_update_pmtu,
@@ -293,7 +293,7 @@ static struct dst_ops xfrm6_dst_ops = {
static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.family = AF_INET6,
- .dst_ops = &xfrm6_dst_ops,
+ .dst_ops = &xfrm6_dst_ops_template,
.dst_lookup = xfrm6_dst_lookup,
.get_saddr = xfrm6_get_saddr,
.decode_session = _decode_session6,
@@ -325,7 +325,7 @@ static struct ctl_table xfrm6_policy_table[] = {
{ }
};
-static int __net_init xfrm6_net_init(struct net *net)
+static int __net_init xfrm6_net_sysctl_init(struct net *net)
{
struct ctl_table *table;
struct ctl_table_header *hdr;
@@ -353,7 +353,7 @@ err_alloc:
return -ENOMEM;
}
-static void __net_exit xfrm6_net_exit(struct net *net)
+static void __net_exit xfrm6_net_sysctl_exit(struct net *net)
{
struct ctl_table *table;
@@ -365,24 +365,52 @@ static void __net_exit xfrm6_net_exit(struct net *net)
if (!net_eq(net, &init_net))
kfree(table);
}
+#else /* CONFIG_SYSCTL */
+static int inline xfrm6_net_sysctl_init(struct net *net)
+{
+ return 0;
+}
+
+static void inline xfrm6_net_sysctl_exit(struct net *net)
+{
+}
+#endif
+
+static int __net_init xfrm6_net_init(struct net *net)
+{
+ int ret;
+
+ memcpy(&net->xfrm.xfrm6_dst_ops, &xfrm6_dst_ops_template,
+ sizeof(xfrm6_dst_ops_template));
+ ret = dst_entries_init(&net->xfrm.xfrm6_dst_ops);
+ if (ret)
+ return ret;
+
+ ret = xfrm6_net_sysctl_init(net);
+ if (ret)
+ dst_entries_destroy(&net->xfrm.xfrm6_dst_ops);
+
+ return ret;
+}
+
+static void __net_exit xfrm6_net_exit(struct net *net)
+{
+ xfrm6_net_sysctl_exit(net);
+ dst_entries_destroy(&net->xfrm.xfrm6_dst_ops);
+}
static struct pernet_operations xfrm6_net_ops = {
.init = xfrm6_net_init,
.exit = xfrm6_net_exit,
};
-#endif
int __init xfrm6_init(void)
{
int ret;
- dst_entries_init(&xfrm6_dst_ops);
-
ret = xfrm6_policy_init();
- if (ret) {
- dst_entries_destroy(&xfrm6_dst_ops);
+ if (ret)
goto out;
- }
ret = xfrm6_state_init();
if (ret)
goto out_policy;
@@ -391,9 +419,7 @@ int __init xfrm6_init(void)
if (ret)
goto out_state;
-#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm6_net_ops);
-#endif
out:
return ret;
out_state:
@@ -405,11 +431,8 @@ out_policy:
void xfrm6_fini(void)
{
-#ifdef CONFIG_SYSCTL
unregister_pernet_subsys(&xfrm6_net_ops);
-#endif
xfrm6_protocol_fini();
xfrm6_policy_fini();
xfrm6_state_fini();
- dst_entries_destroy(&xfrm6_dst_ops);
}
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index e6aa48b5395c..923abd6b3064 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1086,6 +1086,9 @@ static int irda_create(struct net *net, struct socket *sock, int protocol,
struct sock *sk;
struct irda_sock *self;
+ if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+ return -EINVAL;
+
if (net != &init_net)
return -EAFNOSUPPORT;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index fcb2752419c6..435608c4306d 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1483,7 +1483,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
if (sock_writeable(sk) && iucv_below_msglim(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
return mask;
}
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index aca38d8aed8e..a2c8747d2936 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -486,6 +486,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct dst_entry *dst = NULL;
@@ -575,8 +576,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
opt = NULL;
}
- if (opt == NULL)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -631,6 +634,7 @@ done:
dst_release(dst);
out:
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
return err < 0 ? err : len;
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index a758eb84e8f0..ff757181b0a8 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -500,7 +500,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
/* send AddBA request */
ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
tid_tx->dialog_token, start_seq_num,
- local->hw.max_tx_aggregation_subframes,
+ IEEE80211_MAX_AMPDU_BUF,
tid_tx->timeout);
}
@@ -926,6 +926,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK;
tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
+ buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes);
mutex_lock(&sta->ampdu_mlme.mtx);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c2bd1b6a6922..c12f348138ac 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1169,8 +1169,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
* rc isn't initialized here yet, so ignore it
*/
__ieee80211_vht_handle_opmode(sdata, sta,
- params->opmode_notif,
- band, false);
+ params->opmode_notif, band);
}
if (ieee80211_vif_is_mesh(&sdata->vif))
@@ -3454,8 +3453,12 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
goto out_unlock;
}
} else {
- /* for cookie below */
- ack_skb = skb;
+ /* Assign a dummy non-zero cookie, it's not sent to
+ * userspace in this case but we rely on its value
+ * internally in the need_offchan case to distinguish
+ * mgmt-tx from remain-on-channel.
+ */
+ *cookie = 0xffffffff;
}
if (!need_offchan) {
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index d832bd59236b..5322b4c71630 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1709,10 +1709,10 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta);
void ieee80211_sta_set_rx_nss(struct sta_info *sta);
u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta, u8 opmode,
- enum ieee80211_band band, bool nss_only);
+ enum ieee80211_band band);
void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta, u8 opmode,
- enum ieee80211_band band, bool nss_only);
+ enum ieee80211_band band);
void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sta_vht_cap *vht_cap);
void ieee80211_get_vht_mask_from_cap(__le16 vht_cap,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index d0dc1bfaeec2..c9e325d2e120 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -76,7 +76,8 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata,
bool update_bss)
{
- if (__ieee80211_recalc_txpower(sdata) || update_bss)
+ if (__ieee80211_recalc_txpower(sdata) ||
+ (update_bss && ieee80211_sdata_running(sdata)))
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER);
}
@@ -1861,6 +1862,7 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata)
unregister_netdevice(sdata->dev);
} else {
cfg80211_unregister_wdev(&sdata->wdev);
+ ieee80211_teardown_sdata(sdata);
kfree(sdata);
}
}
@@ -1870,7 +1872,6 @@ void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata)
if (WARN_ON_ONCE(!test_bit(SDATA_STATE_RUNNING, &sdata->state)))
return;
ieee80211_do_stop(sdata, true);
- ieee80211_teardown_sdata(sdata);
}
void ieee80211_remove_interfaces(struct ieee80211_local *local)
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 858f6b1cb149..175ffcf7fb06 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -541,8 +541,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
NL80211_FEATURE_HT_IBSS |
NL80211_FEATURE_VIF_TXPOWER |
NL80211_FEATURE_MAC_ON_CREATE |
- NL80211_FEATURE_USERSPACE_MPM |
- NL80211_FEATURE_FULL_AP_CLIENT_STATE;
+ NL80211_FEATURE_USERSPACE_MPM;
if (!ops->hw_scan)
wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index b890e225a8f1..b3b44a5dd375 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -779,10 +779,8 @@ void mesh_plink_broken(struct sta_info *sta)
static void mesh_path_node_reclaim(struct rcu_head *rp)
{
struct mpath_node *node = container_of(rp, struct mpath_node, rcu);
- struct ieee80211_sub_if_data *sdata = node->mpath->sdata;
del_timer_sync(&node->mpath->timer);
- atomic_dec(&sdata->u.mesh.mpaths);
kfree(node->mpath);
kfree(node);
}
@@ -790,8 +788,9 @@ static void mesh_path_node_reclaim(struct rcu_head *rp)
/* needs to be called with the corresponding hashwlock taken */
static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node)
{
- struct mesh_path *mpath;
- mpath = node->mpath;
+ struct mesh_path *mpath = node->mpath;
+ struct ieee80211_sub_if_data *sdata = node->mpath->sdata;
+
spin_lock(&mpath->state_lock);
mpath->flags |= MESH_PATH_RESOLVING;
if (mpath->is_gate)
@@ -799,6 +798,7 @@ static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node)
hlist_del_rcu(&node->list);
call_rcu(&node->rcu, mesh_path_node_reclaim);
spin_unlock(&mpath->state_lock);
+ atomic_dec(&sdata->u.mesh.mpaths);
atomic_dec(&tbl->entries);
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b140cc6651f4..3aa04344942b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1379,21 +1379,26 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
*/
if (has_80211h_pwr &&
(!has_cisco_pwr || pwr_level_80211h <= pwr_level_cisco)) {
+ new_ap_level = pwr_level_80211h;
+
+ if (sdata->ap_power_level == new_ap_level)
+ return 0;
+
sdata_dbg(sdata,
"Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n",
pwr_level_80211h, chan_pwr, pwr_reduction_80211h,
sdata->u.mgd.bssid);
- new_ap_level = pwr_level_80211h;
} else { /* has_cisco_pwr is always true here. */
+ new_ap_level = pwr_level_cisco;
+
+ if (sdata->ap_power_level == new_ap_level)
+ return 0;
+
sdata_dbg(sdata,
"Limiting TX power to %d dBm as advertised by %pM\n",
pwr_level_cisco, sdata->u.mgd.bssid);
- new_ap_level = pwr_level_cisco;
}
- if (sdata->ap_power_level == new_ap_level)
- return 0;
-
sdata->ap_power_level = new_ap_level;
if (__ieee80211_recalc_txpower(sdata))
return BSS_CHANGED_TXPOWER;
@@ -3575,7 +3580,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
if (sta && elems.opmode_notif)
ieee80211_vht_handle_opmode(sdata, sta, *elems.opmode_notif,
- rx_status->band, true);
+ rx_status->band);
mutex_unlock(&local->sta_mtx);
changed |= ieee80211_handle_pwr_constr(sdata, chan, mgmt,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8bae5de0dc44..82af407fea7a 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2736,8 +2736,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
ieee80211_vht_handle_opmode(rx->sdata, rx->sta,
- opmode, status->band,
- false);
+ opmode, status->band);
goto handled;
}
default:
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 4aeca4b0c3cb..a413e52f7691 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -597,8 +597,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
/* We need to ensure power level is at max for scanning. */
ieee80211_hw_config(local, 0);
- if ((req->channels[0]->flags &
- IEEE80211_CHAN_NO_IR) ||
+ if ((req->channels[0]->flags & (IEEE80211_CHAN_NO_IR |
+ IEEE80211_CHAN_RADAR)) ||
!req->n_ssids) {
next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
} else {
@@ -645,7 +645,7 @@ ieee80211_scan_get_channel_time(struct ieee80211_channel *chan)
* TODO: channel switching also consumes quite some time,
* add that delay as well to get a better estimation
*/
- if (chan->flags & IEEE80211_CHAN_NO_IR)
+ if (chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR))
return IEEE80211_PASSIVE_CHANNEL_TIME;
return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME;
}
@@ -777,7 +777,8 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
*
* In any case, it is not necessary for a passive scan.
*/
- if (chan->flags & IEEE80211_CHAN_NO_IR || !scan_req->n_ssids) {
+ if ((chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) ||
+ !scan_req->n_ssids) {
*next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
local->next_scan_state = SCAN_DECISION;
return;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 74058020b7d6..33344f5a66a8 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1641,6 +1641,29 @@ void ieee80211_stop_device(struct ieee80211_local *local)
drv_stop(local);
}
+static void ieee80211_flush_completed_scan(struct ieee80211_local *local,
+ bool aborted)
+{
+ /* It's possible that we don't handle the scan completion in
+ * time during suspend, so if it's still marked as completed
+ * here, queue the work and flush it to clean things up.
+ * Instead of calling the worker function directly here, we
+ * really queue it to avoid potential races with other flows
+ * scheduling the same work.
+ */
+ if (test_bit(SCAN_COMPLETED, &local->scanning)) {
+ /* If coming from reconfiguration failure, abort the scan so
+ * we don't attempt to continue a partial HW scan - which is
+ * possible otherwise if (e.g.) the 2.4 GHz portion was the
+ * completed scan, and a 5 GHz portion is still pending.
+ */
+ if (aborted)
+ set_bit(SCAN_ABORTED, &local->scanning);
+ ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
+ flush_delayed_work(&local->scan_work);
+ }
+}
+
static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
@@ -1660,6 +1683,8 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
local->suspended = false;
local->in_reconfig = false;
+ ieee80211_flush_completed_scan(local, true);
+
/* scheduled scan clearly can't be running any more, but tell
* cfg80211 and clear local state
*/
@@ -1698,6 +1723,27 @@ static void ieee80211_assign_chanctx(struct ieee80211_local *local,
mutex_unlock(&local->chanctx_mtx);
}
+static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct sta_info *sta;
+
+ /* add STAs back */
+ mutex_lock(&local->sta_mtx);
+ list_for_each_entry(sta, &local->sta_list, list) {
+ enum ieee80211_sta_state state;
+
+ if (!sta->uploaded || sta->sdata != sdata)
+ continue;
+
+ for (state = IEEE80211_STA_NOTEXIST;
+ state < sta->sta_state; state++)
+ WARN_ON(drv_sta_state(local, sta->sdata, sta, state,
+ state + 1));
+ }
+ mutex_unlock(&local->sta_mtx);
+}
+
int ieee80211_reconfig(struct ieee80211_local *local)
{
struct ieee80211_hw *hw = &local->hw;
@@ -1833,50 +1879,11 @@ int ieee80211_reconfig(struct ieee80211_local *local)
WARN_ON(drv_add_chanctx(local, ctx));
mutex_unlock(&local->chanctx_mtx);
- list_for_each_entry(sdata, &local->interfaces, list) {
- if (!ieee80211_sdata_running(sdata))
- continue;
- ieee80211_assign_chanctx(local, sdata);
- }
-
sdata = rtnl_dereference(local->monitor_sdata);
if (sdata && ieee80211_sdata_running(sdata))
ieee80211_assign_chanctx(local, sdata);
}
- /* add STAs back */
- mutex_lock(&local->sta_mtx);
- list_for_each_entry(sta, &local->sta_list, list) {
- enum ieee80211_sta_state state;
-
- if (!sta->uploaded)
- continue;
-
- /* AP-mode stations will be added later */
- if (sta->sdata->vif.type == NL80211_IFTYPE_AP)
- continue;
-
- for (state = IEEE80211_STA_NOTEXIST;
- state < sta->sta_state; state++)
- WARN_ON(drv_sta_state(local, sta->sdata, sta, state,
- state + 1));
- }
- mutex_unlock(&local->sta_mtx);
-
- /* reconfigure tx conf */
- if (hw->queues >= IEEE80211_NUM_ACS) {
- list_for_each_entry(sdata, &local->interfaces, list) {
- if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
- sdata->vif.type == NL80211_IFTYPE_MONITOR ||
- !ieee80211_sdata_running(sdata))
- continue;
-
- for (i = 0; i < IEEE80211_NUM_ACS; i++)
- drv_conf_tx(local, sdata, i,
- &sdata->tx_conf[i]);
- }
- }
-
/* reconfigure hardware */
ieee80211_hw_config(local, ~0);
@@ -1889,6 +1896,22 @@ int ieee80211_reconfig(struct ieee80211_local *local)
if (!ieee80211_sdata_running(sdata))
continue;
+ ieee80211_assign_chanctx(local, sdata);
+
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_AP_VLAN:
+ case NL80211_IFTYPE_MONITOR:
+ break;
+ default:
+ ieee80211_reconfig_stations(sdata);
+ /* fall through */
+ case NL80211_IFTYPE_AP: /* AP stations are handled later */
+ for (i = 0; i < IEEE80211_NUM_ACS; i++)
+ drv_conf_tx(local, sdata, i,
+ &sdata->tx_conf[i]);
+ break;
+ }
+
/* common change flags for all interface types */
changed = BSS_CHANGED_ERP_CTS_PROT |
BSS_CHANGED_ERP_PREAMBLE |
@@ -2074,17 +2097,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
mb();
local->resuming = false;
- /* It's possible that we don't handle the scan completion in
- * time during suspend, so if it's still marked as completed
- * here, queue the work and flush it to clean things up.
- * Instead of calling the worker function directly here, we
- * really queue it to avoid potential races with other flows
- * scheduling the same work.
- */
- if (test_bit(SCAN_COMPLETED, &local->scanning)) {
- ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
- flush_delayed_work(&local->scan_work);
- }
+ ieee80211_flush_completed_scan(local, false);
if (local->open_count && !reconfig_due_to_wowlan)
drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_SUSPEND);
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index ff1c798921a6..c38b2f07a919 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -378,7 +378,7 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta)
u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta, u8 opmode,
- enum ieee80211_band band, bool nss_only)
+ enum ieee80211_band band)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
@@ -401,9 +401,6 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
changed |= IEEE80211_RC_NSS_CHANGED;
}
- if (nss_only)
- return changed;
-
switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) {
case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ:
sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20;
@@ -430,13 +427,12 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta, u8 opmode,
- enum ieee80211_band band, bool nss_only)
+ enum ieee80211_band band)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
- u32 changed = __ieee80211_vht_handle_opmode(sdata, sta, opmode,
- band, nss_only);
+ u32 changed = __ieee80211_vht_handle_opmode(sdata, sta, opmode, band);
if (changed > 0)
rate_control_rate_update(local, sband, sta, changed);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index c70d750148b6..c32fc411a911 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -27,6 +27,8 @@
*/
#define MAX_MP_SELECT_LABELS 4
+#define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
+
static int zero = 0;
static int label_limit = (1 << 20) - 1;
@@ -317,7 +319,13 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
}
}
- err = neigh_xmit(nh->nh_via_table, out_dev, mpls_nh_via(rt, nh), skb);
+ /* If via wasn't specified then send out using device address */
+ if (nh->nh_via_table == MPLS_NEIGH_TABLE_UNSPEC)
+ err = neigh_xmit(NEIGH_LINK_TABLE, out_dev,
+ out_dev->dev_addr, skb);
+ else
+ err = neigh_xmit(nh->nh_via_table, out_dev,
+ mpls_nh_via(rt, nh), skb);
if (err)
net_dbg_ratelimited("%s: packet transmission failed: %d\n",
__func__, err);
@@ -534,6 +542,10 @@ static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt,
if (!mpls_dev_get(dev))
goto errout;
+ if ((nh->nh_via_table == NEIGH_LINK_TABLE) &&
+ (dev->addr_len != nh->nh_via_alen))
+ goto errout;
+
RCU_INIT_POINTER(nh->nh_dev, dev);
return 0;
@@ -592,10 +604,14 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt,
goto errout;
}
- err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table,
- __mpls_nh_via(rt, nh));
- if (err)
- goto errout;
+ if (via) {
+ err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table,
+ __mpls_nh_via(rt, nh));
+ if (err)
+ goto errout;
+ } else {
+ nh->nh_via_table = MPLS_NEIGH_TABLE_UNSPEC;
+ }
err = mpls_nh_assign_dev(net, rt, nh, oif);
if (err)
@@ -677,9 +693,6 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg,
nla_newdst = nla_find(attrs, attrlen, RTA_NEWDST);
}
- if (!nla_via)
- goto errout;
-
err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh,
rtnh->rtnh_ifindex, nla_via,
nla_newdst);
@@ -1118,6 +1131,7 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->rc_label = LABEL_NOT_SPECIFIED;
cfg->rc_protocol = rtm->rtm_protocol;
+ cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC;
cfg->rc_nlflags = nlh->nlmsg_flags;
cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->rc_nlinfo.nlh = nlh;
@@ -1231,7 +1245,8 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_labels(skb, RTA_NEWDST, nh->nh_labels,
nh->nh_label))
goto nla_put_failure;
- if (nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh),
+ if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC &&
+ nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh),
nh->nh_via_alen))
goto nla_put_failure;
dev = rtnl_dereference(nh->nh_dev);
@@ -1257,7 +1272,8 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
nh->nh_labels,
nh->nh_label))
goto nla_put_failure;
- if (nla_put_via(skb, nh->nh_via_table,
+ if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC &&
+ nla_put_via(skb, nh->nh_via_table,
mpls_nh_via(rt, nh),
nh->nh_via_alen))
goto nla_put_failure;
@@ -1319,7 +1335,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
if (nh->nh_dev)
payload += nla_total_size(4); /* RTA_OIF */
- payload += nla_total_size(2 + nh->nh_via_alen); /* RTA_VIA */
+ if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) /* RTA_VIA */
+ payload += nla_total_size(2 + nh->nh_via_alen);
if (nh->nh_labels) /* RTA_NEWDST */
payload += nla_total_size(nh->nh_labels * 4);
} else {
@@ -1328,7 +1345,9 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
for_nexthops(rt) {
nhsize += nla_total_size(sizeof(struct rtnexthop));
- nhsize += nla_total_size(2 + nh->nh_via_alen);
+ /* RTA_VIA */
+ if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC)
+ nhsize += nla_total_size(2 + nh->nh_via_alen);
if (nh->nh_labels)
nhsize += nla_total_size(nh->nh_labels * 4);
} endfor_nexthops(rt);
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 67591aef9cae..64afd3d0b144 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -54,10 +54,10 @@ int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb)
unsigned int ttl;
/* Obtain the ttl */
- if (skb->protocol == htons(ETH_P_IP)) {
+ if (dst->ops->family == AF_INET) {
ttl = ip_hdr(skb)->ttl;
rt = (struct rtable *)dst;
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ } else if (dst->ops->family == AF_INET6) {
ttl = ipv6_hdr(skb)->hop_limit;
rt6 = (struct rt6_info *)dst;
} else {
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e22349ea7256..4692782b5280 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -869,7 +869,7 @@ config NETFILTER_XT_TARGET_TEE
depends on IPV6 || IPV6=n
depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DUP_IPV4
- select NF_DUP_IPV6 if IP6_NF_IPTABLES
+ select NF_DUP_IPV6 if IP6_NF_IPTABLES != n
---help---
This option adds a "TEE" target with which a packet can be cloned and
this clone be rerouted to another nexthop.
@@ -882,7 +882,7 @@ config NETFILTER_XT_TARGET_TPROXY
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4
- select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
+ select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
help
This option adds a `TPROXY' target, which is somewhat similar to
REDIRECT. It can only be used in the mangle table and is useful
@@ -1375,7 +1375,7 @@ config NETFILTER_XT_MATCH_SOCKET
depends on IPV6 || IPV6=n
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
select NF_DEFRAG_IPV4
- select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
+ select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
help
This option adds a `socket' match, which can be used to match
packets for which a TCP or UDP socket lookup finds a valid socket.
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index d05e759ed0fa..b0bc475f641e 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -33,7 +33,7 @@
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype MTYPE
-#define get_ext(set, map, id) ((map)->extensions + (set)->dsize * (id))
+#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
static void
mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
@@ -67,12 +67,9 @@ mtype_destroy(struct ip_set *set)
del_timer_sync(&map->gc);
ip_set_free(map->members);
- if (set->dsize) {
- if (set->extensions & IPSET_EXT_DESTROY)
- mtype_ext_cleanup(set);
- ip_set_free(map->extensions);
- }
- kfree(map);
+ if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
+ mtype_ext_cleanup(set);
+ ip_set_free(map);
set->data = NULL;
}
@@ -92,16 +89,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
{
const struct mtype *map = set->data;
struct nlattr *nested;
+ size_t memsize = sizeof(*map) + map->memsize;
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested)
goto nla_put_failure;
if (mtype_do_head(skb, map) ||
nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
- nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
- htonl(sizeof(*map) +
- map->memsize +
- set->dsize * map->elements)))
+ nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
goto nla_put_failure;
if (unlikely(ip_set_put_flags(skb, set)))
goto nla_put_failure;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 64a564334418..4783efff0bde 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -41,7 +41,6 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
void *members; /* the set members */
- void *extensions; /* data extensions */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -49,6 +48,8 @@ struct bitmap_ip {
size_t memsize; /* members size */
u8 netmask; /* subnet netmask */
struct timer_list gc; /* garbage collection */
+ unsigned char extensions[0] /* data extensions */
+ __aligned(__alignof__(u64));
};
/* ADT structure for generic function args */
@@ -224,13 +225,6 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
map->members = ip_set_alloc(map->memsize);
if (!map->members)
return false;
- if (set->dsize) {
- map->extensions = ip_set_alloc(set->dsize * elements);
- if (!map->extensions) {
- kfree(map->members);
- return false;
- }
- }
map->first_ip = first_ip;
map->last_ip = last_ip;
map->elements = elements;
@@ -316,13 +310,13 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
pr_debug("hosts %u, elements %llu\n",
hosts, (unsigned long long)elements);
- map = kzalloc(sizeof(*map), GFP_KERNEL);
+ set->dsize = ip_set_elem_len(set, tb, 0, 0);
+ map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
if (!map)
return -ENOMEM;
map->memsize = bitmap_bytes(0, elements - 1);
set->variant = &bitmap_ip;
- set->dsize = ip_set_elem_len(set, tb, 0);
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 1430535118fb..29dde208381d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -47,24 +47,26 @@ enum {
/* Type structure */
struct bitmap_ipmac {
void *members; /* the set members */
- void *extensions; /* MAC + data extensions */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
size_t memsize; /* members size */
struct timer_list gc; /* garbage collector */
+ unsigned char extensions[0] /* MAC + data extensions */
+ __aligned(__alignof__(u64));
};
/* ADT structure for generic function args */
struct bitmap_ipmac_adt_elem {
+ unsigned char ether[ETH_ALEN] __aligned(2);
u16 id;
- unsigned char *ether;
+ u16 add_mac;
};
struct bitmap_ipmac_elem {
unsigned char ether[ETH_ALEN];
unsigned char filled;
-} __attribute__ ((aligned));
+} __aligned(__alignof__(u64));
static inline u32
ip_to_id(const struct bitmap_ipmac *m, u32 ip)
@@ -72,11 +74,11 @@ ip_to_id(const struct bitmap_ipmac *m, u32 ip)
return ip - m->first_ip;
}
-static inline struct bitmap_ipmac_elem *
-get_elem(void *extensions, u16 id, size_t dsize)
-{
- return (struct bitmap_ipmac_elem *)(extensions + id * dsize);
-}
+#define get_elem(extensions, id, dsize) \
+ (struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
+
+#define get_const_elem(extensions, id, dsize) \
+ (const struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
/* Common functions */
@@ -88,10 +90,9 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
if (!test_bit(e->id, map->members))
return 0;
- elem = get_elem(map->extensions, e->id, dsize);
- if (elem->filled == MAC_FILLED)
- return !e->ether ||
- ether_addr_equal(e->ether, elem->ether);
+ elem = get_const_elem(map->extensions, e->id, dsize);
+ if (e->add_mac && elem->filled == MAC_FILLED)
+ return ether_addr_equal(e->ether, elem->ether);
/* Trigger kernel to fill out the ethernet address */
return -EAGAIN;
}
@@ -103,7 +104,7 @@ bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
if (!test_bit(id, map->members))
return 0;
- elem = get_elem(map->extensions, id, dsize);
+ elem = get_const_elem(map->extensions, id, dsize);
/* Timer not started for the incomplete elements */
return elem->filled == MAC_FILLED;
}
@@ -133,7 +134,7 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
* and we can reuse it later when MAC is filled out,
* possibly by the kernel
*/
- if (e->ether)
+ if (e->add_mac)
ip_set_timeout_set(timeout, t);
else
*timeout = t;
@@ -150,7 +151,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
elem = get_elem(map->extensions, e->id, dsize);
if (test_bit(e->id, map->members)) {
if (elem->filled == MAC_FILLED) {
- if (e->ether &&
+ if (e->add_mac &&
(flags & IPSET_FLAG_EXIST) &&
!ether_addr_equal(e->ether, elem->ether)) {
/* memcpy isn't atomic */
@@ -159,7 +160,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
ether_addr_copy(elem->ether, e->ether);
}
return IPSET_ADD_FAILED;
- } else if (!e->ether)
+ } else if (!e->add_mac)
/* Already added without ethernet address */
return IPSET_ADD_FAILED;
/* Fill the MAC address and trigger the timer activation */
@@ -168,7 +169,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
ether_addr_copy(elem->ether, e->ether);
elem->filled = MAC_FILLED;
return IPSET_ADD_START_STORED_TIMEOUT;
- } else if (e->ether) {
+ } else if (e->add_mac) {
/* We can store MAC too */
ether_addr_copy(elem->ether, e->ether);
elem->filled = MAC_FILLED;
@@ -191,7 +192,7 @@ bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
u32 id, size_t dsize)
{
const struct bitmap_ipmac_elem *elem =
- get_elem(map->extensions, id, dsize);
+ get_const_elem(map->extensions, id, dsize);
return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
htonl(map->first_ip + id)) ||
@@ -213,7 +214,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
{
struct bitmap_ipmac *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct bitmap_ipmac_adt_elem e = { .id = 0 };
+ struct bitmap_ipmac_adt_elem e = { .id = 0, .add_mac = 1 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
u32 ip;
@@ -231,7 +232,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
return -EINVAL;
e.id = ip_to_id(map, ip);
- e.ether = eth_hdr(skb)->h_source;
+ memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
@@ -265,11 +266,10 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
return -IPSET_ERR_BITMAP_RANGE;
e.id = ip_to_id(map, ip);
- if (tb[IPSET_ATTR_ETHER])
- e.ether = nla_data(tb[IPSET_ATTR_ETHER]);
- else
- e.ether = NULL;
-
+ if (tb[IPSET_ATTR_ETHER]) {
+ memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+ e.add_mac = 1;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
return ip_set_eexist(ret, flags) ? 0 : ret;
@@ -300,13 +300,6 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
map->members = ip_set_alloc(map->memsize);
if (!map->members)
return false;
- if (set->dsize) {
- map->extensions = ip_set_alloc(set->dsize * elements);
- if (!map->extensions) {
- kfree(map->members);
- return false;
- }
- }
map->first_ip = first_ip;
map->last_ip = last_ip;
map->elements = elements;
@@ -361,14 +354,15 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (elements > IPSET_BITMAP_MAX_RANGE + 1)
return -IPSET_ERR_BITMAP_RANGE_SIZE;
- map = kzalloc(sizeof(*map), GFP_KERNEL);
+ set->dsize = ip_set_elem_len(set, tb,
+ sizeof(struct bitmap_ipmac_elem),
+ __alignof__(struct bitmap_ipmac_elem));
+ map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
if (!map)
return -ENOMEM;
map->memsize = bitmap_bytes(0, elements - 1);
set->variant = &bitmap_ipmac;
- set->dsize = ip_set_elem_len(set, tb,
- sizeof(struct bitmap_ipmac_elem));
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 5338ccd5da46..7f0c733358a4 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -35,12 +35,13 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
void *members; /* the set members */
- void *extensions; /* data extensions */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
size_t memsize; /* members size */
struct timer_list gc; /* garbage collection */
+ unsigned char extensions[0] /* data extensions */
+ __aligned(__alignof__(u64));
};
/* ADT structure for generic function args */
@@ -209,13 +210,6 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
map->members = ip_set_alloc(map->memsize);
if (!map->members)
return false;
- if (set->dsize) {
- map->extensions = ip_set_alloc(set->dsize * map->elements);
- if (!map->extensions) {
- kfree(map->members);
- return false;
- }
- }
map->first_port = first_port;
map->last_port = last_port;
set->timeout = IPSET_NO_TIMEOUT;
@@ -232,6 +226,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
{
struct bitmap_port *map;
u16 first_port, last_port;
+ u32 elements;
if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -248,14 +243,15 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
last_port = tmp;
}
- map = kzalloc(sizeof(*map), GFP_KERNEL);
+ elements = last_port - first_port + 1;
+ set->dsize = ip_set_elem_len(set, tb, 0, 0);
+ map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
if (!map)
return -ENOMEM;
- map->elements = last_port - first_port + 1;
+ map->elements = elements;
map->memsize = bitmap_bytes(0, map->elements);
set->variant = &bitmap_port;
- set->dsize = ip_set_elem_len(set, tb, 0);
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 69ab9c2634e1..54f3d7cb23e6 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -364,25 +364,27 @@ add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
}
size_t
-ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
+ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
+ size_t align)
{
enum ip_set_ext_id id;
- size_t offset = len;
u32 cadt_flags = 0;
if (tb[IPSET_ATTR_CADT_FLAGS])
cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
set->flags |= IPSET_CREATE_FLAG_FORCEADD;
+ if (!align)
+ align = 1;
for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
if (!add_extension(id, cadt_flags, tb))
continue;
- offset = ALIGN(offset, ip_set_extensions[id].align);
- set->offset[id] = offset;
+ len = ALIGN(len, ip_set_extensions[id].align);
+ set->offset[id] = len;
set->extensions |= ip_set_extensions[id].type;
- offset += ip_set_extensions[id].len;
+ len += ip_set_extensions[id].len;
}
- return offset;
+ return ALIGN(len, align);
}
EXPORT_SYMBOL_GPL(ip_set_elem_len);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 691b54fcaf2a..e5336ab36d67 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -72,8 +72,9 @@ struct hbucket {
DECLARE_BITMAP(used, AHASH_MAX_TUNED);
u8 size; /* size of the array */
u8 pos; /* position of the first free entry */
- unsigned char value[0]; /* the array of the values */
-} __attribute__ ((aligned));
+ unsigned char value[0] /* the array of the values */
+ __aligned(__alignof__(u64));
+};
/* The hash table: the table size stored here in order to make resizing easy */
struct htable {
@@ -475,7 +476,7 @@ static void
mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
{
struct htable *t;
- struct hbucket *n;
+ struct hbucket *n, *tmp;
struct mtype_elem *data;
u32 i, j, d;
#ifdef IP_SET_HASH_WITH_NETS
@@ -510,9 +511,14 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
}
}
if (d >= AHASH_INIT_SIZE) {
- struct hbucket *tmp = kzalloc(sizeof(*tmp) +
- (n->size - AHASH_INIT_SIZE) * dsize,
- GFP_ATOMIC);
+ if (d >= n->size) {
+ rcu_assign_pointer(hbucket(t, i), NULL);
+ kfree_rcu(n, rcu);
+ continue;
+ }
+ tmp = kzalloc(sizeof(*tmp) +
+ (n->size - AHASH_INIT_SIZE) * dsize,
+ GFP_ATOMIC);
if (!tmp)
/* Still try to delete expired elements */
continue;
@@ -522,7 +528,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
continue;
data = ahash_data(n, j, dsize);
memcpy(tmp->value + d * dsize, data, dsize);
- set_bit(j, tmp->used);
+ set_bit(d, tmp->used);
d++;
}
tmp->pos = d;
@@ -1323,12 +1329,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
#endif
set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
set->dsize = ip_set_elem_len(set, tb,
- sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
+ sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
+ __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
#ifndef IP_SET_PROTO_UNDEF
} else {
set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
set->dsize = ip_set_elem_len(set, tb,
- sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)));
+ sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
+ __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
}
#endif
if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 5a30ce6e8c90..bbede95c9f68 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -31,7 +31,7 @@ struct set_elem {
struct rcu_head rcu;
struct list_head list;
ip_set_id_t id;
-};
+} __aligned(__alignof__(u64));
struct set_adt_elem {
ip_set_id_t id;
@@ -618,7 +618,8 @@ list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
size = IP_SET_LIST_MIN_SIZE;
set->variant = &set_variant;
- set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem));
+ set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem),
+ __alignof__(struct set_elem));
if (!init_list_set(net, set, size))
return -ENOMEM;
if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 1e24fff53e4b..f57b4dcdb233 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1176,6 +1176,7 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
+ struct sock *sk;
EnterFunction(11);
@@ -1183,13 +1184,12 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
if (skb->ipvs_property)
return NF_ACCEPT;
+ sk = skb_to_full_sk(skb);
/* Bad... Do not break raw sockets */
- if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
+ if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- struct sock *sk = skb->sk;
- struct inet_sock *inet = inet_sk(skb->sk);
- if (inet && sk->sk_family == PF_INET && inet->nodefrag)
+ if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
return NF_ACCEPT;
}
@@ -1681,6 +1681,7 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
struct ip_vs_conn *cp;
int ret, pkts;
int conn_reuse_mode;
+ struct sock *sk;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
@@ -1708,12 +1709,11 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
ip_vs_fill_iph_skb(af, skb, false, &iph);
/* Bad... Do not break raw sockets */
- if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
+ sk = skb_to_full_sk(skb);
+ if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- struct sock *sk = skb->sk;
- struct inet_sock *inet = inet_sk(skb->sk);
- if (inet && sk->sk_family == PF_INET && inet->nodefrag)
+ if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
return NF_ACCEPT;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 93cc4737018f..2cb429d34c03 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -89,6 +89,7 @@ nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
}
static void nft_ctx_init(struct nft_ctx *ctx,
+ struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
struct nft_af_info *afi,
@@ -96,7 +97,7 @@ static void nft_ctx_init(struct nft_ctx *ctx,
struct nft_chain *chain,
const struct nlattr * const *nla)
{
- ctx->net = sock_net(skb->sk);
+ ctx->net = net;
ctx->afi = afi;
ctx->table = table;
ctx->chain = chain;
@@ -672,15 +673,14 @@ err:
return ret;
}
-static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newtable(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nlattr *name;
struct nft_af_info *afi;
struct nft_table *table;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
u32 flags = 0;
struct nft_ctx ctx;
@@ -706,7 +706,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
return nf_tables_updtable(&ctx);
}
@@ -730,7 +730,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
INIT_LIST_HEAD(&table->sets);
table->flags = flags;
- nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
if (err < 0)
goto err3;
@@ -810,18 +810,17 @@ out:
return err;
}
-static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_deltable(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
struct nft_table *table;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
- nft_ctx_init(&ctx, skb, nlh, NULL, NULL, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, NULL, NULL, NULL, nla);
if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL)
return nft_flush(&ctx, family);
@@ -1221,8 +1220,8 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
}
}
-static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newchain(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -1232,7 +1231,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
struct nft_chain *chain;
struct nft_base_chain *basechain = NULL;
struct nlattr *ha[NFTA_HOOK_MAX + 1];
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct net_device *dev = NULL;
u8 policy = NF_ACCEPT;
@@ -1313,7 +1311,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
return PTR_ERR(stats);
}
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
sizeof(struct nft_trans_chain));
if (trans == NULL) {
@@ -1461,7 +1459,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
if (err < 0)
goto err1;
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
if (err < 0)
goto err2;
@@ -1476,15 +1474,14 @@ err1:
return err;
}
-static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_delchain(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
@@ -1506,7 +1503,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
if (chain->use > 0)
return -EBUSY;
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
return nft_delchain(&ctx);
}
@@ -2010,13 +2007,12 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
static struct nft_expr_info *info;
-static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newrule(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
- struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_chain *chain;
struct nft_rule *rule, *old_rule = NULL;
@@ -2075,7 +2071,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
return PTR_ERR(old_rule);
}
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
n = 0;
size = 0;
@@ -2176,13 +2172,12 @@ err1:
return err;
}
-static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_delrule(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
- struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_chain *chain = NULL;
struct nft_rule *rule;
@@ -2205,7 +2200,7 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
return PTR_ERR(chain);
}
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
if (chain) {
if (nla[NFTA_RULE_HANDLE]) {
@@ -2344,12 +2339,11 @@ static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
[NFTA_SET_DESC_SIZE] = { .type = NLA_U32 },
};
-static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
+static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
- struct net *net = sock_net(skb->sk);
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi = NULL;
struct nft_table *table = NULL;
@@ -2371,7 +2365,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
return -ENOENT;
}
- nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla);
return 0;
}
@@ -2623,6 +2617,7 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
+ struct net *net = sock_net(skb->sk);
const struct nft_set *set;
struct nft_ctx ctx;
struct sk_buff *skb2;
@@ -2630,7 +2625,7 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
int err;
/* Verify existence before starting dump */
- err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla);
if (err < 0)
return err;
@@ -2693,14 +2688,13 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
return 0;
}
-static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newset(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nft_set_ops *ops;
struct nft_af_info *afi;
- struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_set *set;
struct nft_ctx ctx;
@@ -2798,7 +2792,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (IS_ERR(table))
return PTR_ERR(table);
- nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]);
if (IS_ERR(set)) {
@@ -2882,8 +2876,8 @@ static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set
nft_set_destroy(set);
}
-static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_delset(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -2896,7 +2890,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_SET_TABLE] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla);
if (err < 0)
return err;
@@ -3024,7 +3018,7 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
[NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 },
};
-static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
+static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -3033,7 +3027,6 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
struct nft_table *table;
- struct net *net = sock_net(skb->sk);
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
if (IS_ERR(afi))
@@ -3045,7 +3038,7 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
if (!trans && (table->flags & NFT_TABLE_INACTIVE))
return -ENOENT;
- nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla);
return 0;
}
@@ -3135,6 +3128,7 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = sock_net(skb->sk);
const struct nft_set *set;
struct nft_set_dump_args args;
struct nft_ctx ctx;
@@ -3150,8 +3144,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
if (err < 0)
return err;
- err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla,
- false);
+ err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh,
+ (void *)nla, false);
if (err < 0)
return err;
@@ -3212,11 +3206,12 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
+ struct net *net = sock_net(skb->sk);
const struct nft_set *set;
struct nft_ctx ctx;
int err;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, false);
if (err < 0)
return err;
@@ -3528,11 +3523,10 @@ err1:
return err;
}
-static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
- struct net *net = sock_net(skb->sk);
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
@@ -3541,7 +3535,7 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, true);
if (err < 0)
return err;
@@ -3623,8 +3617,8 @@ err1:
return err;
}
-static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nlattr *attr;
@@ -3635,7 +3629,7 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, false);
if (err < 0)
return err;
@@ -4030,7 +4024,8 @@ static int nf_tables_abort(struct sk_buff *skb)
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
- list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list,
+ list) {
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 7b9c053ba750..edb3502f2016 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -94,7 +94,7 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb,
{
struct nft_pktinfo pkt;
- switch (eth_hdr(skb)->h_proto) {
+ switch (skb->protocol) {
case htons(ETH_P_IP):
nft_netdev_set_pktinfo_ipv4(&pkt, skb, state);
break;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 46453ab318db..77afe913d03d 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -295,8 +295,6 @@ replay:
if (!skb)
return netlink_ack(oskb, nlh, -ENOMEM);
- skb->sk = oskb->sk;
-
nfnl_lock(subsys_id);
ss = rcu_dereference_protected(table[subsys_id].subsys,
lockdep_is_held(&table[subsys_id].mutex));
@@ -381,7 +379,7 @@ replay:
goto ack;
if (nc->call_batch) {
- err = nc->call_batch(net->nfnl, skb, nlh,
+ err = nc->call_batch(net, net->nfnl, skb, nlh,
(const struct nlattr **)cda);
}
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 06eb48fceb42..740cce4685ac 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -825,7 +825,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
struct net *net = sock_net(ctnl);
struct nfnl_log_net *log = nfnl_log_pernet(net);
int ret = 0;
- u16 flags;
+ u16 flags = 0;
if (nfula[NFULA_CFG_CMD]) {
u_int8_t pf = nfmsg->nfgen_family;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 7d81d280cb4f..861c6615253b 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -365,8 +365,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
break;
}
+ nfnl_ct = rcu_dereference(nfnl_ct_hook);
+
if (queue->flags & NFQA_CFG_F_CONNTRACK) {
- nfnl_ct = rcu_dereference(nfnl_ct_hook);
if (nfnl_ct != NULL) {
ct = nfnl_ct->get_ct(entskb, &ctinfo);
if (ct != NULL)
@@ -1064,9 +1065,10 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
if (entry == NULL)
return -ENOENT;
+ /* rcu lock already held from nfnl->call_rcu. */
+ nfnl_ct = rcu_dereference(nfnl_ct_hook);
+
if (nfqa[NFQA_CT]) {
- /* rcu lock already held from nfnl->call_rcu. */
- nfnl_ct = rcu_dereference(nfnl_ct_hook);
if (nfnl_ct != NULL)
ct = nfqnl_ct_parse(nfnl_ct, nlh, nfqa, entry, &ctinfo);
}
@@ -1417,6 +1419,7 @@ static int __init nfnetlink_queue_init(void)
cleanup_netlink_notifier:
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+ unregister_pernet_subsys(&nfnl_queue_net_ops);
out:
return status;
}
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 1067fb4c1ffa..c7808fc19719 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -47,27 +47,34 @@ static void nft_counter_eval(const struct nft_expr *expr,
local_bh_enable();
}
-static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter,
+ struct nft_counter *total)
{
- struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
- struct nft_counter_percpu *cpu_stats;
- struct nft_counter total;
+ const struct nft_counter_percpu *cpu_stats;
u64 bytes, packets;
unsigned int seq;
int cpu;
- memset(&total, 0, sizeof(total));
+ memset(total, 0, sizeof(*total));
for_each_possible_cpu(cpu) {
- cpu_stats = per_cpu_ptr(priv->counter, cpu);
+ cpu_stats = per_cpu_ptr(counter, cpu);
do {
seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
bytes = cpu_stats->counter.bytes;
packets = cpu_stats->counter.packets;
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
- total.packets += packets;
- total.bytes += bytes;
+ total->packets += packets;
+ total->bytes += bytes;
}
+}
+
+static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+ struct nft_counter total;
+
+ nft_counter_fetch(priv->counter, &total);
if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) ||
nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets)))
@@ -118,6 +125,31 @@ static void nft_counter_destroy(const struct nft_ctx *ctx,
free_percpu(priv->counter);
}
+static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
+ struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
+ struct nft_counter_percpu __percpu *cpu_stats;
+ struct nft_counter_percpu *this_cpu;
+ struct nft_counter total;
+
+ nft_counter_fetch(priv->counter, &total);
+
+ cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu,
+ GFP_ATOMIC);
+ if (cpu_stats == NULL)
+ return ENOMEM;
+
+ preempt_disable();
+ this_cpu = this_cpu_ptr(cpu_stats);
+ this_cpu->counter.packets = total.packets;
+ this_cpu->counter.bytes = total.bytes;
+ preempt_enable();
+
+ priv_clone->counter = cpu_stats;
+ return 0;
+}
+
static struct nft_expr_type nft_counter_type;
static const struct nft_expr_ops nft_counter_ops = {
.type = &nft_counter_type,
@@ -126,6 +158,7 @@ static const struct nft_expr_ops nft_counter_ops = {
.init = nft_counter_init,
.destroy = nft_counter_destroy,
.dump = nft_counter_dump,
+ .clone = nft_counter_clone,
};
static struct nft_expr_type nft_counter_type __read_mostly = {
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 8cbca3432f90..939921532764 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -366,6 +366,7 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
goto nla_put_failure;
switch (priv->key) {
+ case NFT_CT_L3PROTOCOL:
case NFT_CT_PROTOCOL:
case NFT_CT_SRC:
case NFT_CT_DST:
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 513a8ef60a59..9dec3bd1b63c 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -50,8 +50,9 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
}
ext = nft_set_elem_ext(set, elem);
- if (priv->expr != NULL)
- nft_expr_clone(nft_set_ext_expr(ext), priv->expr);
+ if (priv->expr != NULL &&
+ nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0)
+ return NULL;
return elem;
}
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index b7de0da46acd..ecf0a0196f18 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -572,7 +572,7 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED)
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
pr_debug("mask 0x%x\n", mask);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index c2cc11168fd5..e004067ec24a 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -53,6 +53,8 @@ struct ovs_conntrack_info {
struct md_labels labels;
};
+static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info);
+
static u16 key_to_nfproto(const struct sw_flow_key *key)
{
switch (ntohs(key->eth.type)) {
@@ -141,6 +143,7 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
* previously sent the packet to conntrack via the ct action.
*/
static void ovs_ct_update_key(const struct sk_buff *skb,
+ const struct ovs_conntrack_info *info,
struct sw_flow_key *key, bool post_ct)
{
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
@@ -158,13 +161,15 @@ static void ovs_ct_update_key(const struct sk_buff *skb,
zone = nf_ct_zone(ct);
} else if (post_ct) {
state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
+ if (info)
+ zone = &info->zone;
}
__ovs_ct_update_key(key, state, zone, ct);
}
void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
{
- ovs_ct_update_key(skb, key, false);
+ ovs_ct_update_key(skb, NULL, key, false);
}
int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
@@ -418,7 +423,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
}
}
- ovs_ct_update_key(skb, key, true);
+ ovs_ct_update_key(skb, info, key, true);
return 0;
}
@@ -693,6 +698,10 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
OVS_NLERR(log, "Failed to allocate conntrack template");
return -ENOMEM;
}
+
+ __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
+ nf_conntrack_get(&ct_info.ct->ct_general);
+
if (helper) {
err = ovs_ct_add_helper(&ct_info, helper, key, log);
if (err)
@@ -704,11 +713,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
if (err)
goto err_free_ct;
- __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
- nf_conntrack_get(&ct_info.ct->ct_general);
return 0;
err_free_ct:
- nf_conntrack_free(ct_info.ct);
+ __ovs_ct_free_action(&ct_info);
return err;
}
@@ -750,6 +757,11 @@ void ovs_ct_free_action(const struct nlattr *a)
{
struct ovs_conntrack_info *ct_info = nla_data(a);
+ __ovs_ct_free_action(ct_info);
+}
+
+static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
+{
if (ct_info->helper)
module_put(ct_info->helper->me);
if (ct_info->ct)
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index a7a80a6b77b0..653d073bae45 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -58,7 +58,7 @@ void ovs_dp_notify_wq(struct work_struct *work)
struct hlist_node *n;
hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
- if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
+ if (vport->ops->type == OVS_VPORT_TYPE_INTERNAL)
continue;
if (!(vport->dev->priv_flags & IFF_OVS_DATAPATH))
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 907d6fd28ede..d1bd4a45ca2d 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2434,7 +2434,10 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
if (!start)
return -EMSGSIZE;
- err = ovs_nla_put_tunnel_info(skb, tun_info);
+ err = ip_tun_to_nlattr(skb, &tun_info->key,
+ ip_tunnel_info_opts(tun_info),
+ tun_info->options_len,
+ ip_tunnel_info_af(tun_info));
if (err)
return err;
nla_nest_end(skb, start);
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index efb736bb6855..e41cd12d9b2d 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -117,7 +117,6 @@ static struct vport_ops ovs_geneve_vport_ops = {
.destroy = ovs_netdev_tunnel_destroy,
.get_options = geneve_get_options,
.send = dev_queue_xmit,
- .owner = THIS_MODULE,
};
static int __init ovs_geneve_tnl_init(void)
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index c3257d78d3d2..7f8897f33a67 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -89,7 +89,6 @@ static struct vport_ops ovs_gre_vport_ops = {
.create = gre_create,
.send = dev_queue_xmit,
.destroy = ovs_netdev_tunnel_destroy,
- .owner = THIS_MODULE,
};
static int __init ovs_gre_tnl_init(void)
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index b327368a3848..6b0190b987ec 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -180,9 +180,13 @@ void ovs_netdev_tunnel_destroy(struct vport *vport)
if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
ovs_netdev_detach_dev(vport);
- /* Early release so we can unregister the device */
+ /* We can be invoked by both explicit vport deletion and
+ * underlying netdev deregistration; delete the link only
+ * if it's not already shutting down.
+ */
+ if (vport->dev->reg_state == NETREG_REGISTERED)
+ rtnl_delete_link(vport->dev);
dev_put(vport->dev);
- rtnl_delete_link(vport->dev);
vport->dev = NULL;
rtnl_unlock();
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 0ac0fd004d7e..31cbc8c5c7db 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -71,7 +71,7 @@ static struct hlist_head *hash_bucket(const struct net *net, const char *name)
return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
}
-int ovs_vport_ops_register(struct vport_ops *ops)
+int __ovs_vport_ops_register(struct vport_ops *ops)
{
int err = -EEXIST;
struct vport_ops *o;
@@ -87,7 +87,7 @@ errout:
ovs_unlock();
return err;
}
-EXPORT_SYMBOL_GPL(ovs_vport_ops_register);
+EXPORT_SYMBOL_GPL(__ovs_vport_ops_register);
void ovs_vport_ops_unregister(struct vport_ops *ops)
{
@@ -256,8 +256,8 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
*
* @vport: vport to delete.
*
- * Detaches @vport from its datapath and destroys it. It is possible to fail
- * for reasons such as lack of memory. ovs_mutex must be held.
+ * Detaches @vport from its datapath and destroys it. ovs_mutex must
+ * be held.
*/
void ovs_vport_del(struct vport *vport)
{
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index bdfd82a7c064..8ea3a96980ac 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -196,7 +196,13 @@ static inline const char *ovs_vport_name(struct vport *vport)
return vport->dev->name;
}
-int ovs_vport_ops_register(struct vport_ops *ops);
+int __ovs_vport_ops_register(struct vport_ops *ops);
+#define ovs_vport_ops_register(ops) \
+ ({ \
+ (ops)->owner = THIS_MODULE; \
+ __ovs_vport_ops_register(ops); \
+ })
+
void ovs_vport_ops_unregister(struct vport_ops *ops);
static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index af399cac5205..992396aa635c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1741,6 +1741,20 @@ static void fanout_release(struct sock *sk)
kfree_rcu(po->rollover, rcu);
}
+static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
+ struct sk_buff *skb)
+{
+ /* Earlier code assumed this would be a VLAN pkt, double-check
+ * this now that we have the actual packet in hand. We can only
+ * do this check on Ethernet devices.
+ */
+ if (unlikely(dev->type != ARPHRD_ETHER))
+ return false;
+
+ skb_reset_mac_header(skb);
+ return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
+}
+
static const struct proto_ops packet_ops;
static const struct proto_ops packet_ops_spkt;
@@ -1902,18 +1916,10 @@ retry:
goto retry;
}
- if (len > (dev->mtu + dev->hard_header_len + extra_len)) {
- /* Earlier code assumed this would be a VLAN pkt,
- * double-check this now that we have the actual
- * packet in hand.
- */
- struct ethhdr *ehdr;
- skb_reset_mac_header(skb);
- ehdr = eth_hdr(skb);
- if (ehdr->h_proto != htons(ETH_P_8021Q)) {
- err = -EMSGSIZE;
- goto out_unlock;
- }
+ if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
+ !packet_extra_vlan_len_allowed(dev, skb)) {
+ err = -EMSGSIZE;
+ goto out_unlock;
}
skb->protocol = proto;
@@ -2323,8 +2329,8 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
static bool ll_header_truncated(const struct net_device *dev, int len)
{
/* net device doesn't like empty head */
- if (unlikely(len <= dev->hard_header_len)) {
- net_warn_ratelimited("%s: packet size is too short (%d <= %d)\n",
+ if (unlikely(len < dev->hard_header_len)) {
+ net_warn_ratelimited("%s: packet size is too short (%d < %d)\n",
current->comm, len, dev->hard_header_len);
return true;
}
@@ -2332,6 +2338,15 @@ static bool ll_header_truncated(const struct net_device *dev, int len)
return false;
}
+static void tpacket_set_protocol(const struct net_device *dev,
+ struct sk_buff *skb)
+{
+ if (dev->type == ARPHRD_ETHER) {
+ skb_reset_mac_header(skb);
+ skb->protocol = eth_hdr(skb)->h_proto;
+ }
+}
+
static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
void *frame, struct net_device *dev, int size_max,
__be16 proto, unsigned char *addr, int hlen)
@@ -2368,8 +2383,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb_reserve(skb, hlen);
skb_reset_network_header(skb);
- if (!packet_use_direct_xmit(po))
- skb_probe_transport_header(skb, 0);
if (unlikely(po->tp_tx_has_off)) {
int off_min, off_max, off;
off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
@@ -2415,6 +2428,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
dev->hard_header_len);
if (unlikely(err))
return err;
+ if (!skb->protocol)
+ tpacket_set_protocol(dev, skb);
data += dev->hard_header_len;
to_write -= dev->hard_header_len;
@@ -2449,6 +2464,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
len = ((to_write > len_max) ? len_max : to_write);
}
+ skb_probe_transport_header(skb, 0);
+
return tp_len;
}
@@ -2493,12 +2510,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
if (unlikely(!(dev->flags & IFF_UP)))
goto out_put;
- reserve = dev->hard_header_len + VLAN_HLEN;
+ if (po->sk.sk_socket->type == SOCK_RAW)
+ reserve = dev->hard_header_len;
size_max = po->tx_ring.frame_size
- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
- if (size_max > dev->mtu + reserve)
- size_max = dev->mtu + reserve;
+ if (size_max > dev->mtu + reserve + VLAN_HLEN)
+ size_max = dev->mtu + reserve + VLAN_HLEN;
do {
ph = packet_current_frame(po, &po->tx_ring,
@@ -2525,18 +2543,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
addr, hlen);
if (likely(tp_len >= 0) &&
- tp_len > dev->mtu + dev->hard_header_len) {
- struct ethhdr *ehdr;
- /* Earlier code assumed this would be a VLAN pkt,
- * double-check this now that we have the actual
- * packet in hand.
- */
+ tp_len > dev->mtu + reserve &&
+ !packet_extra_vlan_len_allowed(dev, skb))
+ tp_len = -EMSGSIZE;
- skb_reset_mac_header(skb);
- ehdr = eth_hdr(skb);
- if (ehdr->h_proto != htons(ETH_P_8021Q))
- tp_len = -EMSGSIZE;
- }
if (unlikely(tp_len < 0)) {
if (po->tp_loss) {
__packet_set_status(po, ph,
@@ -2765,18 +2775,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
- if (!gso_type && (len > dev->mtu + reserve + extra_len)) {
- /* Earlier code assumed this would be a VLAN pkt,
- * double-check this now that we have the actual
- * packet in hand.
- */
- struct ethhdr *ehdr;
- skb_reset_mac_header(skb);
- ehdr = eth_hdr(skb);
- if (ehdr->h_proto != htons(ETH_P_8021Q)) {
- err = -EMSGSIZE;
- goto out_free;
- }
+ if (!gso_type && (len > dev->mtu + reserve + extra_len) &&
+ !packet_extra_vlan_len_allowed(dev, skb)) {
+ err = -EMSGSIZE;
+ goto out_free;
}
skb->protocol = proto;
@@ -2807,8 +2809,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
len += vnet_hdr_len;
}
- if (!packet_use_direct_xmit(po))
- skb_probe_transport_header(skb, reserve);
+ skb_probe_transport_header(skb, reserve);
+
if (unlikely(extra_len == 4))
skb->no_fcs = 1;
@@ -4107,7 +4109,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
err = -EINVAL;
if (unlikely((int)req->tp_block_size <= 0))
goto out;
- if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
+ if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
goto out;
if (po->tp_version >= TPACKET_V3 &&
(int)(req->tp_block_size -
@@ -4119,8 +4121,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
goto out;
- rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
- if (unlikely(rb->frames_per_block <= 0))
+ rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
+ if (unlikely(rb->frames_per_block == 0))
goto out;
if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
req->tp_frame_nr))
diff --git a/net/rds/connection.c b/net/rds/connection.c
index d4564036a339..e3b118cae81d 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -186,12 +186,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
}
}
- if (trans == NULL) {
- kmem_cache_free(rds_conn_slab, conn);
- conn = ERR_PTR(-ENODEV);
- goto out;
- }
-
conn->c_trans = trans;
ret = trans->conn_alloc(conn, gfp);
diff --git a/net/rds/send.c b/net/rds/send.c
index 827155c2ead1..c9cdb358ea88 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1013,11 +1013,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
release_sock(sk);
}
- /* racing with another thread binding seems ok here */
+ lock_sock(sk);
if (daddr == 0 || rs->rs_bound_addr == 0) {
+ release_sock(sk);
ret = -ENOTCONN; /* XXX not a great errno */
goto out;
}
+ release_sock(sk);
if (payload_len > rds_sk_sndbuf(rs)) {
ret = -EMSGSIZE;
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index b41e9ea2ffff..f53bf3b6558b 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -49,7 +49,6 @@
struct rfkill {
spinlock_t lock;
- const char *name;
enum rfkill_type type;
unsigned long state;
@@ -73,6 +72,7 @@ struct rfkill {
struct delayed_work poll_work;
struct work_struct uevent_work;
struct work_struct sync_work;
+ char name[];
};
#define to_rfkill(d) container_of(d, struct rfkill, dev)
@@ -876,14 +876,14 @@ struct rfkill * __must_check rfkill_alloc(const char *name,
if (WARN_ON(type == RFKILL_TYPE_ALL || type >= NUM_RFKILL_TYPES))
return NULL;
- rfkill = kzalloc(sizeof(*rfkill), GFP_KERNEL);
+ rfkill = kzalloc(sizeof(*rfkill) + strlen(name) + 1, GFP_KERNEL);
if (!rfkill)
return NULL;
spin_lock_init(&rfkill->lock);
INIT_LIST_HEAD(&rfkill->node);
rfkill->type = type;
- rfkill->name = name;
+ strcpy(rfkill->name, name);
rfkill->ops = ops;
rfkill->data = ops_data;
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index e0547f521f20..adc555e0323d 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -723,8 +723,10 @@ process_further:
if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY ||
call->state == RXRPC_CALL_SERVER_AWAIT_ACK) &&
- hard > tx)
+ hard > tx) {
+ call->acks_hard = tx;
goto all_acked;
+ }
smp_rmb();
rxrpc_rotate_tx_window(call, hard - 1);
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index a40d3afe93b7..14c4e12c47b0 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -531,7 +531,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
/* this should be in poll */
- clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
return -EPIPE;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f43c8f33f09e..b5c2cf2aa6d4 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -253,7 +253,8 @@ int qdisc_set_default(const char *name)
}
/* We know handle. Find qdisc among all qdisc's attached to device
- (root qdisc, all its children, children of children etc.)
+ * (root qdisc, all its children, children of children etc.)
+ * Note: caller either uses rtnl or rcu_read_lock()
*/
static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
@@ -264,7 +265,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
root->handle == handle)
return root;
- list_for_each_entry(q, &root->list, list) {
+ list_for_each_entry_rcu(q, &root->list, list) {
if (q->handle == handle)
return q;
}
@@ -277,15 +278,18 @@ void qdisc_list_add(struct Qdisc *q)
struct Qdisc *root = qdisc_dev(q)->qdisc;
WARN_ON_ONCE(root == &noop_qdisc);
- list_add_tail(&q->list, &root->list);
+ ASSERT_RTNL();
+ list_add_tail_rcu(&q->list, &root->list);
}
}
EXPORT_SYMBOL(qdisc_list_add);
void qdisc_list_del(struct Qdisc *q)
{
- if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
- list_del(&q->list);
+ if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
+ ASSERT_RTNL();
+ list_del_rcu(&q->list);
+ }
}
EXPORT_SYMBOL(qdisc_list_del);
@@ -750,14 +754,18 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
if (n == 0)
return;
drops = max_t(int, n, 0);
+ rcu_read_lock();
while ((parentid = sch->parent)) {
if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
- return;
+ break;
+ if (sch->flags & TCQ_F_NOPARENT)
+ break;
+ /* TODO: perform the search on a per txq basis */
sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
- WARN_ON(parentid != TC_H_ROOT);
- return;
+ WARN_ON_ONCE(parentid != TC_H_ROOT);
+ break;
}
cops = sch->ops->cl_ops;
if (cops->qlen_notify) {
@@ -768,6 +776,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
sch->q.qlen -= n;
__qdisc_qstats_drop(sch, drops);
}
+ rcu_read_unlock();
}
EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index cb5d4ad32946..16bc83b2842a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -658,8 +658,10 @@ static void qdisc_rcu_free(struct rcu_head *head)
{
struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
- if (qdisc_is_percpu_stats(qdisc))
+ if (qdisc_is_percpu_stats(qdisc)) {
free_percpu(qdisc->cpu_bstats);
+ free_percpu(qdisc->cpu_qstats);
+ }
kfree((char *) qdisc - qdisc->padded);
}
@@ -737,7 +739,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
return;
}
if (!netif_is_multiqueue(dev))
- qdisc->flags |= TCQ_F_ONETXQUEUE;
+ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
dev_queue->qdisc_sleeping = qdisc;
}
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index f3cbaecd283a..3e82f047caaf 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -63,7 +63,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
if (qdisc == NULL)
goto err;
priv->qdiscs[ntx] = qdisc;
- qdisc->flags |= TCQ_F_ONETXQUEUE;
+ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
sch->flags |= TCQ_F_MQROOT;
@@ -156,7 +156,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
*old = dev_graft_qdisc(dev_queue, new);
if (new)
- new->flags |= TCQ_F_ONETXQUEUE;
+ new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
if (dev->flags & IFF_UP)
dev_activate(dev);
return 0;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 3811a745452c..ad70ecf57ce7 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -132,7 +132,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
goto err;
}
priv->qdiscs[i] = qdisc;
- qdisc->flags |= TCQ_F_ONETXQUEUE;
+ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
/* If the mqprio options indicate that hardware should own
@@ -209,7 +209,7 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
*old = dev_graft_qdisc(dev_queue, new);
if (new)
- new->flags |= TCQ_F_ONETXQUEUE;
+ new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
if (dev->flags & IFF_UP)
dev_activate(dev);
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 4f15b7d730e1..1543e39f47c3 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -809,8 +809,8 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
if (!has_sha1)
return -EINVAL;
- memcpy(ep->auth_hmacs_list->hmac_ids, &hmacs->shmac_idents[0],
- hmacs->shmac_num_idents * sizeof(__u16));
+ for (i = 0; i < hmacs->shmac_num_idents; i++)
+ ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]);
ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) +
hmacs->shmac_num_idents * sizeof(__u16));
return 0;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e917d27328ea..ec529121f38a 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -209,6 +209,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
struct sock *sk = skb->sk;
struct ipv6_pinfo *np = inet6_sk(sk);
struct flowi6 *fl6 = &transport->fl.u.ip6;
+ int res;
pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb,
skb->len, &fl6->saddr, &fl6->daddr);
@@ -220,7 +221,10 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
- return ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
+ rcu_read_lock();
+ res = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), np->tclass);
+ rcu_read_unlock();
+ return res;
}
/* Returns the dst cache entry for the given source and destination ip
@@ -262,7 +266,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
pr_debug("src=%pI6 - ", &fl6->saddr);
}
- final_p = fl6_update_dst(fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
+
dst = ip6_dst_lookup_flow(sk, fl6, final_p);
if (!asoc || saddr)
goto out;
@@ -316,14 +323,13 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
}
}
}
- rcu_read_unlock();
-
if (baddr) {
fl6->saddr = baddr->v6.sin6_addr;
fl6->fl6_sport = baddr->v6.sin6_port;
- final_p = fl6_update_dst(fl6, np->opt, &final);
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
dst = ip6_dst_lookup_flow(sk, fl6, final_p);
}
+ rcu_read_unlock();
out:
if (!IS_ERR_OR_NULL(dst)) {
@@ -635,6 +641,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
struct sock *newsk;
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct sctp6_sock *newsctp6sk;
+ struct ipv6_txoptions *opt;
newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0);
if (!newsk)
@@ -654,6 +661,13 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt)
+ opt = ipv6_dup_options(newsk, opt);
+ RCU_INIT_POINTER(newnp->opt, opt);
+ rcu_read_unlock();
+
/* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname()
* and getpeername().
*/
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 7e8f0a117106..c0380cfb16ae 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -324,6 +324,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
"illegal chunk");
+ sctp_chunk_hold(chunk);
sctp_outq_tail_data(q, chunk);
if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
@@ -1251,6 +1252,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
*/
sack_a_rwnd = ntohl(sack->a_rwnd);
+ asoc->peer.zero_window_announced = !sack_a_rwnd;
outstanding = q->outstanding_bytes;
if (outstanding < sack_a_rwnd)
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 763e06a55155..5d6a03fad378 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1652,7 +1652,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
/* Set an expiration time for the cookie. */
cookie->c.expiration = ktime_add(asoc->cookie_life,
- ktime_get());
+ ktime_get_real());
/* Copy the peer's init packet. */
memcpy(&cookie->c.peer_init[0], init_chunk->chunk_hdr,
@@ -1780,7 +1780,7 @@ no_hmac:
if (sock_flag(ep->base.sk, SOCK_TIMESTAMP))
kt = skb_get_ktime(skb);
else
- kt = ktime_get();
+ kt = ktime_get_real();
if (!asoc && ktime_before(bear_cookie->expiration, kt)) {
/*
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 6f46aa16cb76..22c2bf367d7e 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -4829,7 +4829,8 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
retval = SCTP_DISPOSITION_CONSUME;
- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+ if (abort)
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
/* Even if we can't send the ABORT due to low memory delete the
* TCB. This is a departure from our typical NOMEM handling.
@@ -4966,7 +4967,8 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
retval = SCTP_DISPOSITION_CONSUME;
- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+ if (abort)
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_CLOSED));
@@ -5412,7 +5414,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
SCTP_INC_STATS(net, SCTP_MIB_T3_RTX_EXPIREDS);
if (asoc->overall_error_count >= asoc->max_retrans) {
- if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) {
+ if (asoc->peer.zero_window_announced &&
+ asoc->state == SCTP_STATE_SHUTDOWN_PENDING) {
/*
* We are here likely because the receiver had its rwnd
* closed for a while and we have not been able to
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 897c01c029ca..ef1d90fdc773 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -972,7 +972,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
return -EFAULT;
/* Alloc space for the address array in kernel memory. */
- kaddrs = kmalloc(addrs_size, GFP_KERNEL);
+ kaddrs = kmalloc(addrs_size, GFP_USER | __GFP_NOWARN);
if (unlikely(!kaddrs))
return -ENOMEM;
@@ -1301,8 +1301,9 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
int addrs_size,
sctp_assoc_t *assoc_id)
{
- int err = 0;
struct sockaddr *kaddrs;
+ gfp_t gfp = GFP_KERNEL;
+ int err = 0;
pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n",
__func__, sk, addrs, addrs_size);
@@ -1315,7 +1316,9 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
return -EFAULT;
/* Alloc space for the address array in kernel memory. */
- kaddrs = kmalloc(addrs_size, GFP_KERNEL);
+ if (sk->sk_socket->file)
+ gfp = GFP_USER | __GFP_NOWARN;
+ kaddrs = kmalloc(addrs_size, gfp);
if (unlikely(!kaddrs))
return -ENOMEM;
@@ -1513,8 +1516,7 @@ static void sctp_close(struct sock *sk, long timeout)
struct sctp_chunk *chunk;
chunk = sctp_make_abort_user(asoc, NULL, 0);
- if (chunk)
- sctp_primitive_ABORT(net, asoc, chunk);
+ sctp_primitive_ABORT(net, asoc, chunk);
} else
sctp_primitive_SHUTDOWN(net, asoc, NULL);
}
@@ -1952,8 +1954,6 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
/* Now send the (possibly) fragmented message. */
list_for_each_entry(chunk, &datamsg->chunks, frag_list) {
- sctp_chunk_hold(chunk);
-
/* Do accounting for the write space. */
sctp_set_owner_w(chunk);
@@ -1966,15 +1966,13 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
* breaks.
*/
err = sctp_primitive_SEND(net, asoc, datamsg);
+ sctp_datamsg_put(datamsg);
/* Did the lower layer accept the chunk? */
- if (err) {
- sctp_datamsg_free(datamsg);
+ if (err)
goto out_free;
- }
pr_debug("%s: we sent primitively\n", __func__);
- sctp_datamsg_put(datamsg);
err = msg_len;
if (unlikely(wait_connect)) {
@@ -4928,7 +4926,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
to = optval + offsetof(struct sctp_getaddrs, addrs);
space_left = len - offsetof(struct sctp_getaddrs, addrs);
- addrs = kmalloc(space_left, GFP_KERNEL);
+ addrs = kmalloc(space_left, GFP_USER | __GFP_NOWARN);
if (!addrs)
return -ENOMEM;
@@ -5777,7 +5775,7 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len,
len = sizeof(struct sctp_assoc_ids) + sizeof(sctp_assoc_t) * num;
- ids = kmalloc(len, GFP_KERNEL);
+ ids = kmalloc(len, GFP_USER | __GFP_NOWARN);
if (unlikely(!ids))
return -ENOMEM;
@@ -6458,7 +6456,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
if (sctp_writeable(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else {
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
/*
* Since the socket is not locked, the buffer
* might be made available after the writeable check and
@@ -6801,26 +6799,30 @@ no_packet:
static void __sctp_write_space(struct sctp_association *asoc)
{
struct sock *sk = asoc->base.sk;
- struct socket *sock = sk->sk_socket;
- if ((sctp_wspace(asoc) > 0) && sock) {
- if (waitqueue_active(&asoc->wait))
- wake_up_interruptible(&asoc->wait);
+ if (sctp_wspace(asoc) <= 0)
+ return;
- if (sctp_writeable(sk)) {
- wait_queue_head_t *wq = sk_sleep(sk);
+ if (waitqueue_active(&asoc->wait))
+ wake_up_interruptible(&asoc->wait);
- if (wq && waitqueue_active(wq))
- wake_up_interruptible(wq);
+ if (sctp_writeable(sk)) {
+ struct socket_wq *wq;
+
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (wq) {
+ if (waitqueue_active(&wq->wait))
+ wake_up_interruptible(&wq->wait);
/* Note that we try to include the Async I/O support
* here by modeling from the current TCP/UDP code.
* We have not tested with it yet.
*/
if (!(sk->sk_shutdown & SEND_SHUTDOWN))
- sock_wake_async(sock,
- SOCK_WAKE_SPACE, POLL_OUT);
+ sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
}
+ rcu_read_unlock();
}
}
@@ -7163,6 +7165,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
newsk->sk_type = sk->sk_type;
newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
newsk->sk_flags = sk->sk_flags;
+ newsk->sk_tsflags = sk->sk_tsflags;
newsk->sk_no_check_tx = sk->sk_no_check_tx;
newsk->sk_no_check_rx = sk->sk_no_check_rx;
newsk->sk_reuse = sk->sk_reuse;
@@ -7195,6 +7198,11 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
newinet->mc_ttl = 1;
newinet->mc_index = 0;
newinet->mc_list = NULL;
+
+ if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
+ net_enable_timestamp();
+
+ security_sk_clone(sk, newsk);
}
static inline void sctp_copy_descendant(struct sock *sk_to,
@@ -7375,6 +7383,13 @@ struct proto sctp_prot = {
#if IS_ENABLED(CONFIG_IPV6)
+#include <net/transp_v6.h>
+static void sctp_v6_destroy_sock(struct sock *sk)
+{
+ sctp_destroy_sock(sk);
+ inet6_destroy_sock(sk);
+}
+
struct proto sctpv6_prot = {
.name = "SCTPv6",
.owner = THIS_MODULE,
@@ -7384,7 +7399,7 @@ struct proto sctpv6_prot = {
.accept = sctp_accept,
.ioctl = sctp_ioctl,
.init = sctp_init_sock,
- .destroy = sctp_destroy_sock,
+ .destroy = sctp_v6_destroy_sock,
.shutdown = sctp_shutdown,
.setsockopt = sctp_setsockopt,
.getsockopt = sctp_getsockopt,
diff --git a/net/socket.c b/net/socket.c
index dd2c247c99e3..d730ef9dfbf0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -257,6 +257,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
}
init_waitqueue_head(&wq->wait);
wq->fasync_list = NULL;
+ wq->flags = 0;
RCU_INIT_POINTER(ei->socket.wq, wq);
ei->socket.state = SS_UNCONNECTED;
@@ -1056,27 +1057,20 @@ static int sock_fasync(int fd, struct file *filp, int on)
return 0;
}
-/* This function may be called only under socket lock or callback_lock or rcu_lock */
+/* This function may be called only under rcu_lock */
-int sock_wake_async(struct socket *sock, int how, int band)
+int sock_wake_async(struct socket_wq *wq, int how, int band)
{
- struct socket_wq *wq;
-
- if (!sock)
- return -1;
- rcu_read_lock();
- wq = rcu_dereference(sock->wq);
- if (!wq || !wq->fasync_list) {
- rcu_read_unlock();
+ if (!wq || !wq->fasync_list)
return -1;
- }
+
switch (how) {
case SOCK_WAKE_WAITD:
- if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
+ if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
break;
goto call_kill;
case SOCK_WAKE_SPACE:
- if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
+ if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
break;
/* fall through */
case SOCK_WAKE_IO:
@@ -1086,7 +1080,7 @@ call_kill:
case SOCK_WAKE_URG:
kill_fasync(&wq->fasync_list, SIGURG, band);
}
- rcu_read_unlock();
+
return 0;
}
EXPORT_SYMBOL(sock_wake_async);
@@ -1702,6 +1696,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
/* We assume all kernel code knows the size of sockaddr_storage */
msg.msg_namelen = 0;
+ msg.msg_iocb = NULL;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index f14f24ee9983..73ad57a59989 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -250,11 +250,11 @@ void rpc_destroy_wait_queue(struct rpc_wait_queue *queue)
}
EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
-static int rpc_wait_bit_killable(struct wait_bit_key *key)
+static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
{
- if (fatal_signal_pending(current))
- return -ERESTARTSYS;
freezable_schedule_unsafe();
+ if (signal_pending_state(mode, current))
+ return -ERESTARTSYS;
return 0;
}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bc5b7b5032ca..cc9852897395 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1364,6 +1364,19 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg));
memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res));
+ /* Adjust the argument buffer length */
+ rqstp->rq_arg.len = req->rq_private_buf.len;
+ if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
+ rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
+ rqstp->rq_arg.page_len = 0;
+ } else if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len +
+ rqstp->rq_arg.page_len)
+ rqstp->rq_arg.page_len = rqstp->rq_arg.len -
+ rqstp->rq_arg.head[0].iov_len;
+ else
+ rqstp->rq_arg.len = rqstp->rq_arg.head[0].iov_len +
+ rqstp->rq_arg.page_len;
+
/* reset result send buffer "put" position */
resv->iov_len = 0;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1d1a70498910..2ffaf6a79499 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -398,7 +398,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
if (unlikely(!sock))
return -ENOTSOCK;
- clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
+ clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags);
if (base != 0) {
addr = NULL;
addrlen = 0;
@@ -442,7 +442,7 @@ static void xs_nospace_callback(struct rpc_task *task)
struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
transport->inet->sk_write_pending--;
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+ clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
}
/**
@@ -467,7 +467,7 @@ static int xs_nospace(struct rpc_task *task)
/* Don't race with disconnect */
if (xprt_connected(xprt)) {
- if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
+ if (test_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags)) {
/*
* Notify TCP that we're limited by the application
* window size
@@ -478,7 +478,7 @@ static int xs_nospace(struct rpc_task *task)
xprt_wait_for_buffer_space(task, xs_nospace_callback);
}
} else {
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+ clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
ret = -ENOTCONN;
}
@@ -626,7 +626,7 @@ process_status:
case -EPERM:
/* When the server has died, an ICMP port unreachable message
* prompts ECONNREFUSED. */
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+ clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
}
return status;
@@ -715,7 +715,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
case -EADDRINUSE:
case -ENOBUFS:
case -EPIPE:
- clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+ clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
}
return status;
@@ -1618,7 +1618,7 @@ static void xs_write_space(struct sock *sk)
if (unlikely(!(xprt = xprt_from_sock(sk))))
return;
- if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
+ if (test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags) == 0)
return;
xprt_write_space(xprt);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 9efbdbde2b08..91aea071ab27 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -191,6 +191,7 @@ void tipc_link_add_bc_peer(struct tipc_link *snd_l,
snd_l->ackers++;
rcv_l->acked = snd_l->snd_nxt - 1;
+ snd_l->state = LINK_ESTABLISHED;
tipc_link_build_bc_init_msg(uc_l, xmitq);
}
@@ -206,6 +207,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
rcv_l->state = LINK_RESET;
if (!snd_l->ackers) {
tipc_link_reset(snd_l);
+ snd_l->state = LINK_RESET;
__skb_queue_purge(xmitq);
}
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 552dbaba9cf3..b53246fb0412 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -105,6 +105,7 @@ struct tipc_sock {
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
static void tipc_data_ready(struct sock *sk);
static void tipc_write_space(struct sock *sk);
+static void tipc_sock_destruct(struct sock *sk);
static int tipc_release(struct socket *sock);
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p);
@@ -381,6 +382,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
sk->sk_rcvbuf = sysctl_tipc_rmem[1];
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
+ sk->sk_destruct = tipc_sock_destruct;
tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
tsk->sent_unacked = 0;
atomic_set(&tsk->dupl_rcvcnt, 0);
@@ -470,9 +472,6 @@ static int tipc_release(struct socket *sock)
tipc_node_remove_conn(net, dnode, tsk->portid);
}
- /* Discard any remaining (connection-based) messages in receive queue */
- __skb_queue_purge(&sk->sk_receive_queue);
-
/* Reject any messages that accumulated in backlog queue */
sock->state = SS_DISCONNECTING;
release_sock(sk);
@@ -1515,6 +1514,11 @@ static void tipc_data_ready(struct sock *sk)
rcu_read_unlock();
}
+static void tipc_sock_destruct(struct sock *sk)
+{
+ __skb_queue_purge(&sk->sk_receive_queue);
+}
+
/**
* filter_connect - Handle all incoming messages for a connection-based socket
* @tsk: TIPC socket
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ad2719ad4c1b..70c03271b798 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -158,8 +158,11 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
struct rtable *rt;
- if (skb_headroom(skb) < UDP_MIN_HEADROOM)
- pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+ if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
+ err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+ if (err)
+ goto tx_error;
+ }
skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
ub = rcu_dereference_rtnl(b->media_ptr);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index aaa0b58d6aba..ef05cd9403d4 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -326,6 +326,118 @@ found:
return s;
}
+/* Support code for asymmetrically connected dgram sockets
+ *
+ * If a datagram socket is connected to a socket not itself connected
+ * to the first socket (eg, /dev/log), clients may only enqueue more
+ * messages if the present receive queue of the server socket is not
+ * "too large". This means there's a second writeability condition
+ * poll and sendmsg need to test. The dgram recv code will do a wake
+ * up on the peer_wait wait queue of a socket upon reception of a
+ * datagram which needs to be propagated to sleeping would-be writers
+ * since these might not have sent anything so far. This can't be
+ * accomplished via poll_wait because the lifetime of the server
+ * socket might be less than that of its clients if these break their
+ * association with it or if the server socket is closed while clients
+ * are still connected to it and there's no way to inform "a polling
+ * implementation" that it should let go of a certain wait queue
+ *
+ * In order to propagate a wake up, a wait_queue_t of the client
+ * socket is enqueued on the peer_wait queue of the server socket
+ * whose wake function does a wake_up on the ordinary client socket
+ * wait queue. This connection is established whenever a write (or
+ * poll for write) hit the flow control condition and broken when the
+ * association to the server socket is dissolved or after a wake up
+ * was relayed.
+ */
+
+static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
+ void *key)
+{
+ struct unix_sock *u;
+ wait_queue_head_t *u_sleep;
+
+ u = container_of(q, struct unix_sock, peer_wake);
+
+ __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
+ q);
+ u->peer_wake.private = NULL;
+
+ /* relaying can only happen while the wq still exists */
+ u_sleep = sk_sleep(&u->sk);
+ if (u_sleep)
+ wake_up_interruptible_poll(u_sleep, key);
+
+ return 0;
+}
+
+static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
+{
+ struct unix_sock *u, *u_other;
+ int rc;
+
+ u = unix_sk(sk);
+ u_other = unix_sk(other);
+ rc = 0;
+ spin_lock(&u_other->peer_wait.lock);
+
+ if (!u->peer_wake.private) {
+ u->peer_wake.private = other;
+ __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
+
+ rc = 1;
+ }
+
+ spin_unlock(&u_other->peer_wait.lock);
+ return rc;
+}
+
+static void unix_dgram_peer_wake_disconnect(struct sock *sk,
+ struct sock *other)
+{
+ struct unix_sock *u, *u_other;
+
+ u = unix_sk(sk);
+ u_other = unix_sk(other);
+ spin_lock(&u_other->peer_wait.lock);
+
+ if (u->peer_wake.private == other) {
+ __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
+ u->peer_wake.private = NULL;
+ }
+
+ spin_unlock(&u_other->peer_wait.lock);
+}
+
+static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
+ struct sock *other)
+{
+ unix_dgram_peer_wake_disconnect(sk, other);
+ wake_up_interruptible_poll(sk_sleep(sk),
+ POLLOUT |
+ POLLWRNORM |
+ POLLWRBAND);
+}
+
+/* preconditions:
+ * - unix_peer(sk) == other
+ * - association is stable
+ */
+static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
+{
+ int connected;
+
+ connected = unix_dgram_peer_wake_connect(sk, other);
+
+ if (unix_recvq_full(other))
+ return 1;
+
+ if (connected)
+ unix_dgram_peer_wake_disconnect(sk, other);
+
+ return 0;
+}
+
static int unix_writable(const struct sock *sk)
{
return sk->sk_state != TCP_LISTEN &&
@@ -431,6 +543,8 @@ static void unix_release_sock(struct sock *sk, int embrion)
skpair->sk_state_change(skpair);
sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
}
+
+ unix_dgram_peer_wake_disconnect(sk, skpair);
sock_put(skpair); /* It may now die */
unix_peer(sk) = NULL;
}
@@ -441,6 +555,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
if (state == TCP_LISTEN)
unix_release_sock(skb->sk, 1);
/* passed fds are erased in the kfree_skb hook */
+ UNIXCB(skb).consumed = skb->len;
kfree_skb(skb);
}
@@ -665,6 +780,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
INIT_LIST_HEAD(&u->link);
mutex_init(&u->readlock); /* single task reading lock */
init_waitqueue_head(&u->peer_wait);
+ init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
unix_insert_socket(unix_sockets_unbound(sk), sk);
out:
if (sk == NULL)
@@ -837,32 +953,20 @@ fail:
return NULL;
}
-static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode,
+ struct path *res)
{
- struct dentry *dentry;
- struct path path;
- int err = 0;
- /*
- * Get the parent directory, calculate the hash for last
- * component.
- */
- dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
- err = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- return err;
+ int err;
- /*
- * All right, let's create it.
- */
- err = security_path_mknod(&path, dentry, mode, 0);
+ err = security_path_mknod(path, dentry, mode, 0);
if (!err) {
- err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
+ err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
if (!err) {
- res->mnt = mntget(path.mnt);
+ res->mnt = mntget(path->mnt);
res->dentry = dget(dentry);
}
}
- done_path_create(&path, dentry);
+
return err;
}
@@ -873,10 +977,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct unix_sock *u = unix_sk(sk);
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
char *sun_path = sunaddr->sun_path;
- int err;
+ int err, name_err;
unsigned int hash;
struct unix_address *addr;
struct hlist_head *list;
+ struct path path;
+ struct dentry *dentry;
err = -EINVAL;
if (sunaddr->sun_family != AF_UNIX)
@@ -892,14 +998,34 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
addr_len = err;
+ name_err = 0;
+ dentry = NULL;
+ if (sun_path[0]) {
+ /* Get the parent directory, calculate the hash for last
+ * component.
+ */
+ dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+
+ if (IS_ERR(dentry)) {
+ /* delay report until after 'already bound' check */
+ name_err = PTR_ERR(dentry);
+ dentry = NULL;
+ }
+ }
+
err = mutex_lock_interruptible(&u->readlock);
if (err)
- goto out;
+ goto out_path;
err = -EINVAL;
if (u->addr)
goto out_up;
+ if (name_err) {
+ err = name_err == -EEXIST ? -EADDRINUSE : name_err;
+ goto out_up;
+ }
+
err = -ENOMEM;
addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
if (!addr)
@@ -910,11 +1036,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
addr->hash = hash ^ sk->sk_type;
atomic_set(&addr->refcnt, 1);
- if (sun_path[0]) {
- struct path path;
+ if (dentry) {
+ struct path u_path;
umode_t mode = S_IFSOCK |
(SOCK_INODE(sock)->i_mode & ~current_umask());
- err = unix_mknod(sun_path, mode, &path);
+ err = unix_mknod(dentry, &path, mode, &u_path);
if (err) {
if (err == -EEXIST)
err = -EADDRINUSE;
@@ -922,9 +1048,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out_up;
}
addr->hash = UNIX_HASH_SIZE;
- hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
+ hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
spin_lock(&unix_table_lock);
- u->path = path;
+ u->path = u_path;
list = &unix_socket_table[hash];
} else {
spin_lock(&unix_table_lock);
@@ -947,6 +1073,10 @@ out_unlock:
spin_unlock(&unix_table_lock);
out_up:
mutex_unlock(&u->readlock);
+out_path:
+ if (dentry)
+ done_path_create(&path, dentry);
+
out:
return err;
}
@@ -1032,6 +1162,8 @@ restart:
if (unix_peer(sk)) {
struct sock *old_peer = unix_peer(sk);
unix_peer(sk) = other;
+ unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
+
unix_state_double_unlock(sk, other);
if (other != old_peer)
@@ -1433,6 +1565,14 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
return err;
}
+static bool unix_passcred_enabled(const struct socket *sock,
+ const struct sock *other)
+{
+ return test_bit(SOCK_PASSCRED, &sock->flags) ||
+ !other->sk_socket ||
+ test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
+}
+
/*
* Some apps rely on write() giving SCM_CREDENTIALS
* We include credentials if source or destination socket
@@ -1443,14 +1583,41 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
{
if (UNIXCB(skb).pid)
return;
- if (test_bit(SOCK_PASSCRED, &sock->flags) ||
- !other->sk_socket ||
- test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
+ if (unix_passcred_enabled(sock, other)) {
UNIXCB(skb).pid = get_pid(task_tgid(current));
current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
}
}
+static int maybe_init_creds(struct scm_cookie *scm,
+ struct socket *socket,
+ const struct sock *other)
+{
+ int err;
+ struct msghdr msg = { .msg_controllen = 0 };
+
+ err = scm_send(socket, &msg, scm, false);
+ if (err)
+ return err;
+
+ if (unix_passcred_enabled(socket, other)) {
+ scm->pid = get_pid(task_tgid(current));
+ current_uid_gid(&scm->creds.uid, &scm->creds.gid);
+ }
+ return err;
+}
+
+static bool unix_skb_scm_eq(struct sk_buff *skb,
+ struct scm_cookie *scm)
+{
+ const struct unix_skb_parms *u = &UNIXCB(skb);
+
+ return u->pid == scm->pid &&
+ uid_eq(u->uid, scm->creds.uid) &&
+ gid_eq(u->gid, scm->creds.gid) &&
+ unix_secdata_eq(scm, skb);
+}
+
/*
* Send AF_UNIX data.
*/
@@ -1471,6 +1638,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
struct scm_cookie scm;
int max_level;
int data_len = 0;
+ int sk_locked;
wait_for_unix_gc();
err = scm_send(sock, msg, &scm, false);
@@ -1549,12 +1717,14 @@ restart:
goto out_free;
}
+ sk_locked = 0;
unix_state_lock(other);
+restart_locked:
err = -EPERM;
if (!unix_may_send(sk, other))
goto out_unlock;
- if (sock_flag(other, SOCK_DEAD)) {
+ if (unlikely(sock_flag(other, SOCK_DEAD))) {
/*
* Check with 1003.1g - what should
* datagram error
@@ -1562,10 +1732,14 @@ restart:
unix_state_unlock(other);
sock_put(other);
+ if (!sk_locked)
+ unix_state_lock(sk);
+
err = 0;
- unix_state_lock(sk);
if (unix_peer(sk) == other) {
unix_peer(sk) = NULL;
+ unix_dgram_peer_wake_disconnect_wakeup(sk, other);
+
unix_state_unlock(sk);
unix_dgram_disconnected(sk, other);
@@ -1591,21 +1765,38 @@ restart:
goto out_unlock;
}
- if (unix_peer(other) != sk && unix_recvq_full(other)) {
- if (!timeo) {
- err = -EAGAIN;
- goto out_unlock;
+ if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+ if (timeo) {
+ timeo = unix_wait_for_peer(other, timeo);
+
+ err = sock_intr_errno(timeo);
+ if (signal_pending(current))
+ goto out_free;
+
+ goto restart;
}
- timeo = unix_wait_for_peer(other, timeo);
+ if (!sk_locked) {
+ unix_state_unlock(other);
+ unix_state_double_lock(sk, other);
+ }
- err = sock_intr_errno(timeo);
- if (signal_pending(current))
- goto out_free;
+ if (unix_peer(sk) != other ||
+ unix_dgram_peer_wake_me(sk, other)) {
+ err = -EAGAIN;
+ sk_locked = 1;
+ goto out_unlock;
+ }
- goto restart;
+ if (!sk_locked) {
+ sk_locked = 1;
+ goto restart_locked;
+ }
}
+ if (unlikely(sk_locked))
+ unix_state_unlock(sk);
+
if (sock_flag(other, SOCK_RCVTSTAMP))
__net_timestamp(skb);
maybe_add_creds(skb, sock, other);
@@ -1619,6 +1810,8 @@ restart:
return len;
out_unlock:
+ if (sk_locked)
+ unix_state_unlock(sk);
unix_state_unlock(other);
out_free:
kfree_skb(skb);
@@ -1740,8 +1933,10 @@ out_err:
static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
int offset, size_t size, int flags)
{
- int err = 0;
- bool send_sigpipe = true;
+ int err;
+ bool send_sigpipe = false;
+ bool init_scm = true;
+ struct scm_cookie scm;
struct sock *other, *sk = socket->sk;
struct sk_buff *skb, *newskb = NULL, *tail = NULL;
@@ -1759,7 +1954,7 @@ alloc_skb:
newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
&err, 0);
if (!newskb)
- return err;
+ goto err;
}
/* we must acquire readlock as we modify already present
@@ -1768,12 +1963,12 @@ alloc_skb:
err = mutex_lock_interruptible(&unix_sk(other)->readlock);
if (err) {
err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
- send_sigpipe = false;
goto err;
}
if (sk->sk_shutdown & SEND_SHUTDOWN) {
err = -EPIPE;
+ send_sigpipe = true;
goto err_unlock;
}
@@ -1782,23 +1977,34 @@ alloc_skb:
if (sock_flag(other, SOCK_DEAD) ||
other->sk_shutdown & RCV_SHUTDOWN) {
err = -EPIPE;
+ send_sigpipe = true;
goto err_state_unlock;
}
+ if (init_scm) {
+ err = maybe_init_creds(&scm, socket, other);
+ if (err)
+ goto err_state_unlock;
+ init_scm = false;
+ }
+
skb = skb_peek_tail(&other->sk_receive_queue);
if (tail && tail == skb) {
skb = newskb;
- } else if (!skb) {
- if (newskb)
+ } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
+ if (newskb) {
skb = newskb;
- else
+ } else {
+ tail = skb;
goto alloc_skb;
+ }
} else if (newskb) {
/* this is fast path, we don't necessarily need to
* call to kfree_skb even though with newskb == NULL
* this - does no harm
*/
consume_skb(newskb);
+ newskb = NULL;
}
if (skb_append_pagefrags(skb, page, offset, size)) {
@@ -1811,14 +2017,20 @@ alloc_skb:
skb->truesize += size;
atomic_add(size, &sk->sk_wmem_alloc);
- if (newskb)
+ if (newskb) {
+ err = unix_scm_to_skb(&scm, skb, false);
+ if (err)
+ goto err_state_unlock;
+ spin_lock(&other->sk_receive_queue.lock);
__skb_queue_tail(&other->sk_receive_queue, newskb);
+ spin_unlock(&other->sk_receive_queue.lock);
+ }
unix_state_unlock(other);
mutex_unlock(&unix_sk(other)->readlock);
other->sk_data_ready(other);
-
+ scm_destroy(&scm);
return size;
err_state_unlock:
@@ -1829,6 +2041,8 @@ err:
kfree_skb(newskb);
if (send_sigpipe && !(flags & MSG_NOSIGNAL))
send_sig(SIGPIPE, current, 0);
+ if (!init_scm)
+ scm_destroy(&scm);
return err;
}
@@ -1991,7 +2205,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
!timeo)
break;
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
unix_state_unlock(sk);
timeo = freezable_schedule_timeout(timeo);
unix_state_lock(sk);
@@ -1999,7 +2213,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
if (sock_flag(sk, SOCK_DEAD))
break;
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
}
finish_wait(sk_sleep(sk), &wait);
@@ -2056,14 +2270,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
/* Lock the socket to prevent queue disordering
* while sleeps in memcpy_tomsg
*/
- err = mutex_lock_interruptible(&u->readlock);
- if (unlikely(err)) {
- /* recvmsg() in non blocking mode is supposed to return -EAGAIN
- * sk_rcvtimeo is not honored by mutex_lock_interruptible()
- */
- err = noblock ? -EAGAIN : -ERESTARTSYS;
- goto out;
- }
+ mutex_lock(&u->readlock);
if (flags & MSG_PEEK)
skip = sk_peek_offset(sk, flags);
@@ -2072,6 +2279,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
do {
int chunk;
+ bool drop_skb;
struct sk_buff *skb, *last;
unix_state_lock(sk);
@@ -2106,12 +2314,12 @@ again:
timeo = unix_stream_data_wait(sk, timeo, last,
last_len);
- if (signal_pending(current) ||
- mutex_lock_interruptible(&u->readlock)) {
+ if (signal_pending(current)) {
err = sock_intr_errno(timeo);
goto out;
}
+ mutex_lock(&u->readlock);
continue;
unlock:
unix_state_unlock(sk);
@@ -2131,10 +2339,7 @@ unlock:
if (check_creds) {
/* Never glue messages from different writers */
- if ((UNIXCB(skb).pid != scm.pid) ||
- !uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
- !gid_eq(UNIXCB(skb).gid, scm.creds.gid) ||
- !unix_secdata_eq(&scm, skb))
+ if (!unix_skb_scm_eq(skb, &scm))
break;
} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
/* Copy credentials */
@@ -2152,7 +2357,11 @@ unlock:
}
chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
+ skb_get(skb);
chunk = state->recv_actor(skb, skip, chunk, state);
+ drop_skb = !unix_skb_len(skb);
+ /* skb is only safe to use if !drop_skb */
+ consume_skb(skb);
if (chunk < 0) {
if (copied == 0)
copied = -EFAULT;
@@ -2161,6 +2370,18 @@ unlock:
copied += chunk;
size -= chunk;
+ if (drop_skb) {
+ /* the skb was touched by a concurrent reader;
+ * we should not expect anything from this skb
+ * anymore and assume it invalid - we can be
+ * sure it was dropped from the socket queue
+ *
+ * let's report a short read
+ */
+ err = 0;
+ break;
+ }
+
/* Mark read part of skb as used */
if (!(flags & MSG_PEEK)) {
UNIXCB(skb).consumed += chunk;
@@ -2454,20 +2675,22 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
return mask;
writable = unix_writable(sk);
- other = unix_peer_get(sk);
- if (other) {
- if (unix_peer(other) != sk) {
- sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
- if (unix_recvq_full(other))
- writable = 0;
- }
- sock_put(other);
+ if (writable) {
+ unix_state_lock(sk);
+
+ other = unix_peer(sk);
+ if (other && unix_peer(other) != sk &&
+ unix_recvq_full(other) &&
+ unix_dgram_peer_wake_me(sk, other))
+ writable = 0;
+
+ unix_state_unlock(sk);
}
if (writable)
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
return mask;
}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c71e274c810a..75b0d23ee882 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7941,8 +7941,10 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) {
if (!(rdev->wiphy.features &
NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) ||
- !(rdev->wiphy.features & NL80211_FEATURE_QUIET))
+ !(rdev->wiphy.features & NL80211_FEATURE_QUIET)) {
+ kzfree(connkeys);
return -EINVAL;
+ }
connect.flags |= ASSOC_REQ_USE_RRM;
}
@@ -9503,6 +9505,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
if (new_triggers.tcp && new_triggers.tcp->sock)
sock_release(new_triggers.tcp->sock);
kfree(new_triggers.tcp);
+ kfree(new_triggers.nd_config);
return err;
}
#endif
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2e8d6f39ed56..06d050da0d94 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -3029,6 +3029,7 @@ int set_regdom(const struct ieee80211_regdomain *rd,
break;
default:
WARN(1, "invalid initiator %d\n", lr->initiator);
+ kfree(rd);
return -EINVAL;
}
@@ -3221,8 +3222,10 @@ int __init regulatory_init(void)
/* We always try to get an update for the static regdomain */
err = regulatory_hint_core(cfg80211_world_regdom->alpha2);
if (err) {
- if (err == -ENOMEM)
+ if (err == -ENOMEM) {
+ platform_device_unregister(reg_pdev);
return err;
+ }
/*
* N.B. kobject_uevent_env() can fail mainly for when we're out
* memory which is handled and propagated appropriately above
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 09bfcbac63bb..b5e665b3cfb0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -303,6 +303,14 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
}
EXPORT_SYMBOL(xfrm_policy_alloc);
+static void xfrm_policy_destroy_rcu(struct rcu_head *head)
+{
+ struct xfrm_policy *policy = container_of(head, struct xfrm_policy, rcu);
+
+ security_xfrm_policy_free(policy->security);
+ kfree(policy);
+}
+
/* Destroy xfrm_policy: descendant resources must be released to this moment. */
void xfrm_policy_destroy(struct xfrm_policy *policy)
@@ -312,8 +320,7 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
BUG();
- security_xfrm_policy_free(policy->security);
- kfree(policy);
+ call_rcu(&policy->rcu, xfrm_policy_destroy_rcu);
}
EXPORT_SYMBOL(xfrm_policy_destroy);
@@ -1214,8 +1221,10 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
struct xfrm_policy *pol;
struct net *net = sock_net(sk);
+ rcu_read_lock();
read_lock_bh(&net->xfrm.xfrm_policy_lock);
- if ((pol = sk->sk_policy[dir]) != NULL) {
+ pol = rcu_dereference(sk->sk_policy[dir]);
+ if (pol != NULL) {
bool match = xfrm_selector_match(&pol->selector, fl,
sk->sk_family);
int err = 0;
@@ -1239,6 +1248,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
}
out:
read_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ rcu_read_unlock();
return pol;
}
@@ -1307,13 +1317,14 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
#endif
write_lock_bh(&net->xfrm.xfrm_policy_lock);
- old_pol = sk->sk_policy[dir];
- sk->sk_policy[dir] = pol;
+ old_pol = rcu_dereference_protected(sk->sk_policy[dir],
+ lockdep_is_held(&net->xfrm.xfrm_policy_lock));
if (pol) {
pol->curlft.add_time = get_seconds();
pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
xfrm_sk_policy_link(pol, dir);
}
+ rcu_assign_pointer(sk->sk_policy[dir], pol);
if (old_pol) {
if (pol)
xfrm_policy_requeue(old_pol, pol);
@@ -1361,17 +1372,26 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
return newp;
}
-int __xfrm_sk_clone_policy(struct sock *sk)
+int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
{
- struct xfrm_policy *p0 = sk->sk_policy[0],
- *p1 = sk->sk_policy[1];
+ const struct xfrm_policy *p;
+ struct xfrm_policy *np;
+ int i, ret = 0;
- sk->sk_policy[0] = sk->sk_policy[1] = NULL;
- if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
- return -ENOMEM;
- if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
- return -ENOMEM;
- return 0;
+ rcu_read_lock();
+ for (i = 0; i < 2; i++) {
+ p = rcu_dereference(osk->sk_policy[i]);
+ if (p) {
+ np = clone_policy(p, i);
+ if (unlikely(!np)) {
+ ret = -ENOMEM;
+ break;
+ }
+ rcu_assign_pointer(sk->sk_policy[i], np);
+ }
+ }
+ rcu_read_unlock();
+ return ret;
}
static int
@@ -2198,6 +2218,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
xdst = NULL;
route = NULL;
+ sk = sk_const_to_full_sk(sk);
if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
num_pols = 1;
pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
@@ -2477,6 +2498,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
}
pol = NULL;
+ sk = sk_to_full_sk(sk);
if (sk && sk->sk_policy[dir]) {
pol = xfrm_sk_policy_lookup(sk, dir, &fl);
if (IS_ERR(pol)) {
@@ -2804,7 +2826,6 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
{
- struct net *net;
int err = 0;
if (unlikely(afinfo == NULL))
return -EINVAL;
@@ -2835,26 +2856,6 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
}
spin_unlock(&xfrm_policy_afinfo_lock);
- rtnl_lock();
- for_each_net(net) {
- struct dst_ops *xfrm_dst_ops;
-
- switch (afinfo->family) {
- case AF_INET:
- xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
- break;
-#if IS_ENABLED(CONFIG_IPV6)
- case AF_INET6:
- xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
- break;
-#endif
- default:
- BUG();
- }
- *xfrm_dst_ops = *afinfo->dst_ops;
- }
- rtnl_unlock();
-
return err;
}
EXPORT_SYMBOL(xfrm_policy_register_afinfo);
@@ -2890,22 +2891,6 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
}
EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
-static void __net_init xfrm_dst_ops_init(struct net *net)
-{
- struct xfrm_policy_afinfo *afinfo;
-
- rcu_read_lock();
- afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]);
- if (afinfo)
- net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
-#if IS_ENABLED(CONFIG_IPV6)
- afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]);
- if (afinfo)
- net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
-#endif
- rcu_read_unlock();
-}
-
static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
@@ -3054,7 +3039,6 @@ static int __net_init xfrm_net_init(struct net *net)
rv = xfrm_policy_init(net);
if (rv < 0)
goto out_policy;
- xfrm_dst_ops_init(net);
rv = xfrm_sysctl_init(net);
if (rv < 0)
goto out_sysctl;
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 79b4596b5f9a..edd638b5825f 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -67,10 +67,13 @@ HOSTLOADLIBES_lathist += -lelf
# point this to your LLVM backend with bpf support
LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
+# asm/sysreg.h inline assmbly used by it is incompatible with llvm.
+# But, ehere is not easy way to fix it, so just exclude it since it is
+# useless for BPF samples.
$(obj)/%.o: $(src)/%.c
clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
- -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
+ -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
- -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
+ -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=asm -o $@.s
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 125b906cd1d4..638a38e1b419 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -2711,7 +2711,7 @@ $kernelversion = get_kernel_version();
# generate a sequence of code that will splice in highlighting information
# using the s// operator.
-foreach my $k (keys @highlights) {
+for (my $k = 0; $k < @highlights; $k++) {
my $pattern = $highlights[$k][0];
my $result = $highlights[$k][1];
# print STDERR "scanning pattern:$pattern, highlight:($result)\n";
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 1a10d8ac8162..dacf71a43ad4 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -62,7 +62,7 @@ vmlinux_link()
-Wl,--start-group \
${KBUILD_VMLINUX_MAIN} \
-Wl,--end-group \
- -lutil ${1}
+ -lutil -lrt ${1}
rm -f linux
fi
}
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 698768bdc581..e167592793a7 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -48,12 +48,17 @@
static int fd_map; /* File descriptor for file being modified. */
static int mmap_failed; /* Boolean flag. */
-static void *ehdr_curr; /* current ElfXX_Ehdr * for resource cleanup */
static char gpfx; /* prefix for global symbol name (sometimes '_') */
static struct stat sb; /* Remember .st_size, etc. */
static jmp_buf jmpenv; /* setjmp/longjmp per-file error escape */
static const char *altmcount; /* alternate mcount symbol name */
static int warn_on_notrace_sect; /* warn when section has mcount not being recorded */
+static void *file_map; /* pointer of the mapped file */
+static void *file_end; /* pointer to the end of the mapped file */
+static int file_updated; /* flag to state file was changed */
+static void *file_ptr; /* current file pointer location */
+static void *file_append; /* added to the end of the file */
+static size_t file_append_size; /* how much is added to end of file */
/* setjmp() return values */
enum {
@@ -67,10 +72,14 @@ static void
cleanup(void)
{
if (!mmap_failed)
- munmap(ehdr_curr, sb.st_size);
+ munmap(file_map, sb.st_size);
else
- free(ehdr_curr);
- close(fd_map);
+ free(file_map);
+ file_map = NULL;
+ free(file_append);
+ file_append = NULL;
+ file_append_size = 0;
+ file_updated = 0;
}
static void __attribute__((noreturn))
@@ -92,12 +101,22 @@ succeed_file(void)
static off_t
ulseek(int const fd, off_t const offset, int const whence)
{
- off_t const w = lseek(fd, offset, whence);
- if (w == (off_t)-1) {
- perror("lseek");
+ switch (whence) {
+ case SEEK_SET:
+ file_ptr = file_map + offset;
+ break;
+ case SEEK_CUR:
+ file_ptr += offset;
+ break;
+ case SEEK_END:
+ file_ptr = file_map + (sb.st_size - offset);
+ break;
+ }
+ if (file_ptr < file_map) {
+ fprintf(stderr, "lseek: seek before file\n");
fail_file();
}
- return w;
+ return file_ptr - file_map;
}
static size_t
@@ -114,12 +133,38 @@ uread(int const fd, void *const buf, size_t const count)
static size_t
uwrite(int const fd, void const *const buf, size_t const count)
{
- size_t const n = write(fd, buf, count);
- if (n != count) {
- perror("write");
- fail_file();
+ size_t cnt = count;
+ off_t idx = 0;
+
+ file_updated = 1;
+
+ if (file_ptr + count >= file_end) {
+ off_t aoffset = (file_ptr + count) - file_end;
+
+ if (aoffset > file_append_size) {
+ file_append = realloc(file_append, aoffset);
+ file_append_size = aoffset;
+ }
+ if (!file_append) {
+ perror("write");
+ fail_file();
+ }
+ if (file_ptr < file_end) {
+ cnt = file_end - file_ptr;
+ } else {
+ cnt = 0;
+ idx = aoffset - count;
+ }
}
- return n;
+
+ if (cnt)
+ memcpy(file_ptr, buf, cnt);
+
+ if (cnt < count)
+ memcpy(file_append + idx, buf + cnt, count - cnt);
+
+ file_ptr += count;
+ return count;
}
static void *
@@ -192,9 +237,7 @@ static int make_nop_arm64(void *map, size_t const offset)
*/
static void *mmap_file(char const *fname)
{
- void *addr;
-
- fd_map = open(fname, O_RDWR);
+ fd_map = open(fname, O_RDONLY);
if (fd_map < 0 || fstat(fd_map, &sb) < 0) {
perror(fname);
fail_file();
@@ -203,15 +246,58 @@ static void *mmap_file(char const *fname)
fprintf(stderr, "not a regular file: %s\n", fname);
fail_file();
}
- addr = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE,
- fd_map, 0);
+ file_map = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE,
+ fd_map, 0);
mmap_failed = 0;
- if (addr == MAP_FAILED) {
+ if (file_map == MAP_FAILED) {
mmap_failed = 1;
- addr = umalloc(sb.st_size);
- uread(fd_map, addr, sb.st_size);
+ file_map = umalloc(sb.st_size);
+ uread(fd_map, file_map, sb.st_size);
+ }
+ close(fd_map);
+
+ file_end = file_map + sb.st_size;
+
+ return file_map;
+}
+
+static void write_file(const char *fname)
+{
+ char tmp_file[strlen(fname) + 4];
+ size_t n;
+
+ if (!file_updated)
+ return;
+
+ sprintf(tmp_file, "%s.rc", fname);
+
+ /*
+ * After reading the entire file into memory, delete it
+ * and write it back, to prevent weird side effects of modifying
+ * an object file in place.
+ */
+ fd_map = open(tmp_file, O_WRONLY | O_TRUNC | O_CREAT, sb.st_mode);
+ if (fd_map < 0) {
+ perror(fname);
+ fail_file();
+ }
+ n = write(fd_map, file_map, sb.st_size);
+ if (n != sb.st_size) {
+ perror("write");
+ fail_file();
+ }
+ if (file_append_size) {
+ n = write(fd_map, file_append, file_append_size);
+ if (n != file_append_size) {
+ perror("write");
+ fail_file();
+ }
+ }
+ close(fd_map);
+ if (rename(tmp_file, fname) < 0) {
+ perror(fname);
+ fail_file();
}
- return addr;
}
/* w8rev, w8nat, ...: Handle endianness. */
@@ -318,7 +404,6 @@ do_file(char const *const fname)
Elf32_Ehdr *const ehdr = mmap_file(fname);
unsigned int reltype = 0;
- ehdr_curr = ehdr;
w = w4nat;
w2 = w2nat;
w8 = w8nat;
@@ -441,6 +526,7 @@ do_file(char const *const fname)
}
} /* end switch */
+ write_file(fname);
cleanup();
}
@@ -493,11 +579,14 @@ main(int argc, char *argv[])
case SJ_SETJMP: /* normal sequence */
/* Avoid problems if early cleanup() */
fd_map = -1;
- ehdr_curr = NULL;
mmap_failed = 1;
+ file_map = NULL;
+ file_ptr = NULL;
+ file_updated = 0;
do_file(file);
break;
case SJ_FAIL: /* error in do_file or below */
+ fprintf(stderr, "%s: failed\n", file);
++n_error;
break;
case SJ_SUCCEED: /* premature success */
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 927db9f35ad6..696ccfa08d10 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -845,6 +845,8 @@ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep)
size_t datalen = prep->datalen;
int ret = 0;
+ if (test_bit(KEY_FLAG_NEGATIVE, &key->flags))
+ return -ENOKEY;
if (datalen <= 0 || datalen > 32767 || !prep->data)
return -EINVAL;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index fb111eafcb89..1c3872aeed14 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -751,16 +751,16 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen)
/* the key is probably readable - now try to read it */
can_read_key:
- ret = key_validate(key);
- if (ret == 0) {
- ret = -EOPNOTSUPP;
- if (key->type->read) {
- /* read the data with the semaphore held (since we
- * might sleep) */
- down_read(&key->sem);
+ ret = -EOPNOTSUPP;
+ if (key->type->read) {
+ /* Read the data with the semaphore held (since we might sleep)
+ * to protect against the key being updated or revoked.
+ */
+ down_read(&key->sem);
+ ret = key_validate(key);
+ if (ret == 0)
ret = key->type->read(key, buffer, buflen);
- up_read(&key->sem);
- }
+ up_read(&key->sem);
}
error2:
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 903dace648a1..16dec53184b6 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -1007,13 +1007,16 @@ static void trusted_rcu_free(struct rcu_head *rcu)
*/
static int trusted_update(struct key *key, struct key_preparsed_payload *prep)
{
- struct trusted_key_payload *p = key->payload.data[0];
+ struct trusted_key_payload *p;
struct trusted_key_payload *new_p;
struct trusted_key_options *new_o;
size_t datalen = prep->datalen;
char *datablob;
int ret = 0;
+ if (test_bit(KEY_FLAG_NEGATIVE, &key->flags))
+ return -ENOKEY;
+ p = key->payload.data[0];
if (!p->migratable)
return -EPERM;
if (datalen <= 0 || datalen > 32767 || !prep->data)
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index 28cb30f80256..8705d79b2c6f 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -120,7 +120,10 @@ int user_update(struct key *key, struct key_preparsed_payload *prep)
if (ret == 0) {
/* attach the new data, displacing the old */
- zap = key->payload.data[0];
+ if (!test_bit(KEY_FLAG_NEGATIVE, &key->flags))
+ zap = key->payload.data[0];
+ else
+ zap = NULL;
rcu_assign_keypointer(key, upayload);
key->expiry = 0;
}
diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c
index 18643bf9894d..456e1a9bcfde 100644
--- a/security/selinux/ss/conditional.c
+++ b/security/selinux/ss/conditional.c
@@ -638,7 +638,7 @@ void cond_compute_av(struct avtab *ctab, struct avtab_key *key,
{
struct avtab_node *node;
- if (!ctab || !key || !avd || !xperms)
+ if (!ctab || !key || !avd)
return;
for (node = avtab_search_node(ctab, key); node;
@@ -657,7 +657,7 @@ void cond_compute_av(struct avtab *ctab, struct avtab_key *key,
if ((u16)(AVTAB_AUDITALLOW|AVTAB_ENABLED) ==
(node->key.specified & (AVTAB_AUDITALLOW|AVTAB_ENABLED)))
avd->auditallow |= node->datum.u.data;
- if ((node->key.specified & AVTAB_ENABLED) &&
+ if (xperms && (node->key.specified & AVTAB_ENABLED) &&
(node->key.specified & AVTAB_XPERMS))
services_compute_xperms_drivers(xperms, node);
}
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index ff81026f6ddb..37fdd5416a64 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1519,8 +1519,6 @@ static int smack_inode_getsecurity(const struct inode *inode,
* @inode: the object
* @buffer: where they go
* @buffer_size: size of buffer
- *
- * Returns 0 on success, -EINVAL otherwise
*/
static int smack_inode_listsecurity(struct inode *inode, char *buffer,
size_t buffer_size)
diff --git a/sound/firewire/dice/dice.c b/sound/firewire/dice/dice.c
index 5d99436dfcae..0cda05c72f50 100644
--- a/sound/firewire/dice/dice.c
+++ b/sound/firewire/dice/dice.c
@@ -12,9 +12,11 @@ MODULE_AUTHOR("Clemens Ladisch <clemens@ladisch.de>");
MODULE_LICENSE("GPL v2");
#define OUI_WEISS 0x001c6a
+#define OUI_LOUD 0x000ff2
#define DICE_CATEGORY_ID 0x04
#define WEISS_CATEGORY_ID 0x00
+#define LOUD_CATEGORY_ID 0x10
static int dice_interface_check(struct fw_unit *unit)
{
@@ -57,6 +59,8 @@ static int dice_interface_check(struct fw_unit *unit)
}
if (vendor == OUI_WEISS)
category = WEISS_CATEGORY_ID;
+ else if (vendor == OUI_LOUD)
+ category = LOUD_CATEGORY_ID;
else
category = DICE_CATEGORY_ID;
if (device->config_rom[3] != ((vendor << 8) | category) ||
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 8a7fbdcb4072..3b3658297070 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -312,6 +312,10 @@ enum {
(AZX_DCAPS_INTEL_PCH | AZX_DCAPS_SEPARATE_STREAM_TAG |\
AZX_DCAPS_I915_POWERWELL)
+#define AZX_DCAPS_INTEL_BROXTON \
+ (AZX_DCAPS_INTEL_PCH | AZX_DCAPS_SEPARATE_STREAM_TAG |\
+ AZX_DCAPS_I915_POWERWELL)
+
/* quirks for ATI SB / AMD Hudson */
#define AZX_DCAPS_PRESET_ATI_SB \
(AZX_DCAPS_NO_TCSEL | AZX_DCAPS_SYNC_WRITE | AZX_DCAPS_POSFIX_LPIB |\
@@ -351,6 +355,8 @@ enum {
((pci)->device == 0x0d0c) || \
((pci)->device == 0x160c))
+#define IS_BROXTON(pci) ((pci)->device == 0x5a98)
+
static char *driver_short_names[] = {
[AZX_DRIVER_ICH] = "HDA Intel",
[AZX_DRIVER_PCH] = "HDA Intel PCH",
@@ -502,15 +508,36 @@ static void azx_init_pci(struct azx *chip)
}
}
+/*
+ * In BXT-P A0, HD-Audio DMA requests is later than expected,
+ * and makes an audio stream sensitive to system latencies when
+ * 24/32 bits are playing.
+ * Adjusting threshold of DMA fifo to force the DMA request
+ * sooner to improve latency tolerance at the expense of power.
+ */
+static void bxt_reduce_dma_latency(struct azx *chip)
+{
+ u32 val;
+
+ val = azx_readl(chip, SKL_EM4L);
+ val &= (0x3 << 20);
+ azx_writel(chip, SKL_EM4L, val);
+}
+
static void hda_intel_init_chip(struct azx *chip, bool full_reset)
{
struct hdac_bus *bus = azx_bus(chip);
+ struct pci_dev *pci = chip->pci;
if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
snd_hdac_set_codec_wakeup(bus, true);
azx_init_chip(chip, full_reset);
if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
snd_hdac_set_codec_wakeup(bus, false);
+
+ /* reduce dma latency to avoid noise */
+ if (IS_BROXTON(pci))
+ bxt_reduce_dma_latency(chip);
}
/* calculate runtime delay from LPIB */
@@ -927,6 +954,36 @@ static int azx_resume(struct device *dev)
}
#endif /* CONFIG_PM_SLEEP || SUPPORT_VGA_SWITCHEROO */
+#ifdef CONFIG_PM_SLEEP
+/* put codec down to D3 at hibernation for Intel SKL+;
+ * otherwise BIOS may still access the codec and screw up the driver
+ */
+#define IS_SKL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa170)
+#define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70)
+#define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci))
+
+static int azx_freeze_noirq(struct device *dev)
+{
+ struct pci_dev *pci = to_pci_dev(dev);
+
+ if (IS_SKL_PLUS(pci))
+ pci_set_power_state(pci, PCI_D3hot);
+
+ return 0;
+}
+
+static int azx_thaw_noirq(struct device *dev)
+{
+ struct pci_dev *pci = to_pci_dev(dev);
+
+ if (IS_SKL_PLUS(pci))
+ pci_set_power_state(pci, PCI_D0);
+
+ return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
#ifdef CONFIG_PM
static int azx_runtime_suspend(struct device *dev)
{
@@ -1036,6 +1093,10 @@ static int azx_runtime_idle(struct device *dev)
static const struct dev_pm_ops azx_pm = {
SET_SYSTEM_SLEEP_PM_OPS(azx_suspend, azx_resume)
+#ifdef CONFIG_PM_SLEEP
+ .freeze_noirq = azx_freeze_noirq,
+ .thaw_noirq = azx_thaw_noirq,
+#endif
SET_RUNTIME_PM_OPS(azx_runtime_suspend, azx_runtime_resume, azx_runtime_idle)
};
@@ -2124,6 +2185,9 @@ static const struct pci_device_id azx_ids[] = {
/* Sunrise Point-LP */
{ PCI_DEVICE(0x8086, 0x9d70),
.driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
+ /* Broxton-P(Apollolake) */
+ { PCI_DEVICE(0x8086, 0x5a98),
+ .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
/* Haswell */
{ PCI_DEVICE(0x8086, 0x0a0c),
.driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_HASWELL },
diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index f8a12ca477f1..4ef2259f88ca 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -778,7 +778,8 @@ static const struct hda_pintbl alienware_pincfgs[] = {
};
static const struct snd_pci_quirk ca0132_quirks[] = {
- SND_PCI_QUIRK(0x1028, 0x0685, "Alienware 15", QUIRK_ALIENWARE),
+ SND_PCI_QUIRK(0x1028, 0x0685, "Alienware 15 2015", QUIRK_ALIENWARE),
+ SND_PCI_QUIRK(0x1028, 0x0688, "Alienware 17 2015", QUIRK_ALIENWARE),
{}
};
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index c8b8ef5246a6..ef198903c0c3 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -955,6 +955,7 @@ static int patch_conexant_auto(struct hda_codec *codec)
*/
static const struct hda_device_id snd_hda_id_conexant[] = {
+ HDA_CODEC_ENTRY(0x14f12008, "CX8200", patch_conexant_auto),
HDA_CODEC_ENTRY(0x14f15045, "CX20549 (Venice)", patch_conexant_auto),
HDA_CODEC_ENTRY(0x14f15047, "CX20551 (Waikiki)", patch_conexant_auto),
HDA_CODEC_ENTRY(0x14f15051, "CX20561 (Hermosa)", patch_conexant_auto),
@@ -972,9 +973,9 @@ static const struct hda_device_id snd_hda_id_conexant[] = {
HDA_CODEC_ENTRY(0x14f150ac, "CX20652", patch_conexant_auto),
HDA_CODEC_ENTRY(0x14f150b8, "CX20664", patch_conexant_auto),
HDA_CODEC_ENTRY(0x14f150b9, "CX20665", patch_conexant_auto),
- HDA_CODEC_ENTRY(0x14f150f1, "CX20721", patch_conexant_auto),
+ HDA_CODEC_ENTRY(0x14f150f1, "CX21722", patch_conexant_auto),
HDA_CODEC_ENTRY(0x14f150f2, "CX20722", patch_conexant_auto),
- HDA_CODEC_ENTRY(0x14f150f3, "CX20723", patch_conexant_auto),
+ HDA_CODEC_ENTRY(0x14f150f3, "CX21724", patch_conexant_auto),
HDA_CODEC_ENTRY(0x14f150f4, "CX20724", patch_conexant_auto),
HDA_CODEC_ENTRY(0x14f1510f, "CX20751/2", patch_conexant_auto),
HDA_CODEC_ENTRY(0x14f15110, "CX20751/2", patch_conexant_auto),
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 60cd9e700909..4b6fb668c91c 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -2352,6 +2352,12 @@ static void intel_pin_eld_notify(void *audio_ptr, int port)
struct hda_codec *codec = audio_ptr;
int pin_nid = port + 0x04;
+ /* skip notification during system suspend (but not in runtime PM);
+ * the state will be updated at resume
+ */
+ if (snd_power_get_state(codec->card) != SNDRV_CTL_POWER_D0)
+ return;
+
check_presence_and_report(codec, pin_nid);
}
@@ -2378,7 +2384,8 @@ static int patch_generic_hdmi(struct hda_codec *codec)
* can cover the codec power request, and so need not set this flag.
* For previous platforms, there is no such power well feature.
*/
- if (is_valleyview_plus(codec) || is_skylake(codec))
+ if (is_valleyview_plus(codec) || is_skylake(codec) ||
+ is_broxton(codec))
codec->core.link_power_control = 1;
if (is_haswell_plus(codec) || is_valleyview_plus(codec)) {
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 2f7b065f9ac4..3a89d82f8057 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -67,6 +67,10 @@ enum {
ALC_HEADSET_TYPE_OMTP,
};
+enum {
+ ALC_KEY_MICMUTE_INDEX,
+};
+
struct alc_customize_define {
unsigned int sku_cfg;
unsigned char port_connectivity;
@@ -111,6 +115,7 @@ struct alc_spec {
void (*power_hook)(struct hda_codec *codec);
#endif
void (*shutup)(struct hda_codec *codec);
+ void (*reboot_notify)(struct hda_codec *codec);
int init_amp;
int codec_variant; /* flag for other variants */
@@ -122,6 +127,7 @@ struct alc_spec {
unsigned int pll_coef_idx, pll_coef_bit;
unsigned int coef0;
struct input_dev *kb_dev;
+ u8 alc_mute_keycode_map[1];
};
/*
@@ -773,6 +779,25 @@ static inline void alc_shutup(struct hda_codec *codec)
snd_hda_shutup_pins(codec);
}
+static void alc_reboot_notify(struct hda_codec *codec)
+{
+ struct alc_spec *spec = codec->spec;
+
+ if (spec && spec->reboot_notify)
+ spec->reboot_notify(codec);
+ else
+ alc_shutup(codec);
+}
+
+/* power down codec to D3 at reboot/shutdown; set as reboot_notify ops */
+static void alc_d3_at_reboot(struct hda_codec *codec)
+{
+ snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3);
+ snd_hda_codec_write(codec, codec->core.afg, 0,
+ AC_VERB_SET_POWER_STATE, AC_PWRST_D3);
+ msleep(10);
+}
+
#define alc_free snd_hda_gen_free
#ifdef CONFIG_PM
@@ -818,7 +843,7 @@ static const struct hda_codec_ops alc_patch_ops = {
.suspend = alc_suspend,
.check_power_status = snd_hda_gen_check_power_status,
#endif
- .reboot_notify = alc_shutup,
+ .reboot_notify = alc_reboot_notify,
};
@@ -1755,10 +1780,12 @@ enum {
ALC889_FIXUP_MBA11_VREF,
ALC889_FIXUP_MBA21_VREF,
ALC889_FIXUP_MP11_VREF,
+ ALC889_FIXUP_MP41_VREF,
ALC882_FIXUP_INV_DMIC,
ALC882_FIXUP_NO_PRIMARY_HP,
ALC887_FIXUP_ASUS_BASS,
ALC887_FIXUP_BASS_CHMAP,
+ ALC882_FIXUP_DISABLE_AAMIX,
};
static void alc889_fixup_coef(struct hda_codec *codec,
@@ -1842,7 +1869,7 @@ static void alc889_fixup_mbp_vref(struct hda_codec *codec,
const struct hda_fixup *fix, int action)
{
struct alc_spec *spec = codec->spec;
- static hda_nid_t nids[2] = { 0x14, 0x15 };
+ static hda_nid_t nids[3] = { 0x14, 0x15, 0x19 };
int i;
if (action != HDA_FIXUP_ACT_INIT)
@@ -1920,6 +1947,8 @@ static void alc882_fixup_no_primary_hp(struct hda_codec *codec,
static void alc_fixup_bass_chmap(struct hda_codec *codec,
const struct hda_fixup *fix, int action);
+static void alc_fixup_disable_aamix(struct hda_codec *codec,
+ const struct hda_fixup *fix, int action);
static const struct hda_fixup alc882_fixups[] = {
[ALC882_FIXUP_ABIT_AW9D_MAX] = {
@@ -2130,6 +2159,12 @@ static const struct hda_fixup alc882_fixups[] = {
.chained = true,
.chain_id = ALC885_FIXUP_MACPRO_GPIO,
},
+ [ALC889_FIXUP_MP41_VREF] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc889_fixup_mbp_vref,
+ .chained = true,
+ .chain_id = ALC885_FIXUP_MACPRO_GPIO,
+ },
[ALC882_FIXUP_INV_DMIC] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc_fixup_inv_dmic,
@@ -2151,6 +2186,10 @@ static const struct hda_fixup alc882_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc_fixup_bass_chmap,
},
+ [ALC882_FIXUP_DISABLE_AAMIX] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc_fixup_disable_aamix,
+ },
};
static const struct snd_pci_quirk alc882_fixup_tbl[] = {
@@ -2208,7 +2247,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
SND_PCI_QUIRK(0x106b, 0x3f00, "Macbook 5,1", ALC889_FIXUP_IMAC91_VREF),
SND_PCI_QUIRK(0x106b, 0x4000, "MacbookPro 5,1", ALC889_FIXUP_IMAC91_VREF),
SND_PCI_QUIRK(0x106b, 0x4100, "Macmini 3,1", ALC889_FIXUP_IMAC91_VREF),
- SND_PCI_QUIRK(0x106b, 0x4200, "Mac Pro 5,1", ALC885_FIXUP_MACPRO_GPIO),
+ SND_PCI_QUIRK(0x106b, 0x4200, "Mac Pro 4,1/5,1", ALC889_FIXUP_MP41_VREF),
SND_PCI_QUIRK(0x106b, 0x4300, "iMac 9,1", ALC889_FIXUP_IMAC91_VREF),
SND_PCI_QUIRK(0x106b, 0x4600, "MacbookPro 5,2", ALC889_FIXUP_IMAC91_VREF),
SND_PCI_QUIRK(0x106b, 0x4900, "iMac 9,1 Aluminum", ALC889_FIXUP_IMAC91_VREF),
@@ -2218,6 +2257,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD),
SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1458, 0xa182, "Gigabyte Z170X-UD3", ALC882_FIXUP_DISABLE_AAMIX),
SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
@@ -3427,12 +3467,43 @@ static void gpio2_mic_hotkey_event(struct hda_codec *codec,
/* GPIO2 just toggles on a keypress/keyrelease cycle. Therefore
send both key on and key off event for every interrupt. */
- input_report_key(spec->kb_dev, KEY_MICMUTE, 1);
+ input_report_key(spec->kb_dev, spec->alc_mute_keycode_map[ALC_KEY_MICMUTE_INDEX], 1);
input_sync(spec->kb_dev);
- input_report_key(spec->kb_dev, KEY_MICMUTE, 0);
+ input_report_key(spec->kb_dev, spec->alc_mute_keycode_map[ALC_KEY_MICMUTE_INDEX], 0);
input_sync(spec->kb_dev);
}
+static int alc_register_micmute_input_device(struct hda_codec *codec)
+{
+ struct alc_spec *spec = codec->spec;
+ int i;
+
+ spec->kb_dev = input_allocate_device();
+ if (!spec->kb_dev) {
+ codec_err(codec, "Out of memory (input_allocate_device)\n");
+ return -ENOMEM;
+ }
+
+ spec->alc_mute_keycode_map[ALC_KEY_MICMUTE_INDEX] = KEY_MICMUTE;
+
+ spec->kb_dev->name = "Microphone Mute Button";
+ spec->kb_dev->evbit[0] = BIT_MASK(EV_KEY);
+ spec->kb_dev->keycodesize = sizeof(spec->alc_mute_keycode_map[0]);
+ spec->kb_dev->keycodemax = ARRAY_SIZE(spec->alc_mute_keycode_map);
+ spec->kb_dev->keycode = spec->alc_mute_keycode_map;
+ for (i = 0; i < ARRAY_SIZE(spec->alc_mute_keycode_map); i++)
+ set_bit(spec->alc_mute_keycode_map[i], spec->kb_dev->keybit);
+
+ if (input_register_device(spec->kb_dev)) {
+ codec_err(codec, "input_register_device failed\n");
+ input_free_device(spec->kb_dev);
+ spec->kb_dev = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static void alc280_fixup_hp_gpio2_mic_hotkey(struct hda_codec *codec,
const struct hda_fixup *fix, int action)
{
@@ -3450,20 +3521,8 @@ static void alc280_fixup_hp_gpio2_mic_hotkey(struct hda_codec *codec,
struct alc_spec *spec = codec->spec;
if (action == HDA_FIXUP_ACT_PRE_PROBE) {
- spec->kb_dev = input_allocate_device();
- if (!spec->kb_dev) {
- codec_err(codec, "Out of memory (input_allocate_device)\n");
- return;
- }
- spec->kb_dev->name = "Microphone Mute Button";
- spec->kb_dev->evbit[0] = BIT_MASK(EV_KEY);
- spec->kb_dev->keybit[BIT_WORD(KEY_MICMUTE)] = BIT_MASK(KEY_MICMUTE);
- if (input_register_device(spec->kb_dev)) {
- codec_err(codec, "input_register_device failed\n");
- input_free_device(spec->kb_dev);
- spec->kb_dev = NULL;
+ if (alc_register_micmute_input_device(codec) != 0)
return;
- }
snd_hda_add_verbs(codec, gpio_init);
snd_hda_codec_write_cache(codec, codec->core.afg, 0,
@@ -3493,6 +3552,47 @@ static void alc280_fixup_hp_gpio2_mic_hotkey(struct hda_codec *codec,
}
}
+static void alc233_fixup_lenovo_line2_mic_hotkey(struct hda_codec *codec,
+ const struct hda_fixup *fix, int action)
+{
+ /* Line2 = mic mute hotkey
+ GPIO2 = mic mute LED */
+ static const struct hda_verb gpio_init[] = {
+ { 0x01, AC_VERB_SET_GPIO_MASK, 0x04 },
+ { 0x01, AC_VERB_SET_GPIO_DIRECTION, 0x04 },
+ {}
+ };
+
+ struct alc_spec *spec = codec->spec;
+
+ if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+ if (alc_register_micmute_input_device(codec) != 0)
+ return;
+
+ snd_hda_add_verbs(codec, gpio_init);
+ snd_hda_jack_detect_enable_callback(codec, 0x1b,
+ gpio2_mic_hotkey_event);
+
+ spec->gen.cap_sync_hook = alc_fixup_gpio_mic_mute_hook;
+ spec->gpio_led = 0;
+ spec->mute_led_polarity = 0;
+ spec->gpio_mic_led_mask = 0x04;
+ return;
+ }
+
+ if (!spec->kb_dev)
+ return;
+
+ switch (action) {
+ case HDA_FIXUP_ACT_PROBE:
+ spec->init_amp = ALC_INIT_DEFAULT;
+ break;
+ case HDA_FIXUP_ACT_FREE:
+ input_unregister_device(spec->kb_dev);
+ spec->kb_dev = NULL;
+ }
+}
+
static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec,
const struct hda_fixup *fix, int action)
{
@@ -4190,6 +4290,8 @@ static void alc_fixup_tpt440_dock(struct hda_codec *codec,
struct alc_spec *spec = codec->spec;
if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+ spec->shutup = alc_no_shutup; /* reduce click noise */
+ spec->reboot_notify = alc_d3_at_reboot; /* reduce noise */
spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP;
codec->power_save_node = 0; /* avoid click noises */
snd_hda_apply_pincfgs(codec, pincfgs);
@@ -4570,6 +4672,7 @@ enum {
ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC,
ALC293_FIXUP_DELL1_MIC_NO_PRESENCE,
ALC292_FIXUP_TPT440_DOCK,
+ ALC292_FIXUP_TPT440,
ALC283_FIXUP_BXBT2807_MIC,
ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED,
ALC282_FIXUP_ASPIRE_V5_PINS,
@@ -4585,8 +4688,12 @@ enum {
ALC288_FIXUP_DISABLE_AAMIX,
ALC292_FIXUP_DELL_E7X,
ALC292_FIXUP_DISABLE_AAMIX,
+ ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK,
ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
ALC275_FIXUP_DELL_XPS,
+ ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
+ ALC293_FIXUP_LENOVO_SPK_NOISE,
+ ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY,
};
static const struct hda_fixup alc269_fixups[] = {
@@ -5041,6 +5148,12 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC269_FIXUP_LIMIT_INT_MIC_BOOST
},
+ [ALC292_FIXUP_TPT440] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc_fixup_disable_aamix,
+ .chained = true,
+ .chain_id = ALC292_FIXUP_TPT440_DOCK,
+ },
[ALC283_FIXUP_BXBT2807_MIC] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
@@ -5140,6 +5253,12 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE
},
+ [ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc_fixup_disable_aamix,
+ .chained = true,
+ .chain_id = ALC293_FIXUP_DELL1_MIC_NO_PRESENCE
+ },
[ALC292_FIXUP_DELL_E7X] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc_fixup_dell_xps13,
@@ -5167,6 +5286,27 @@ static const struct hda_fixup alc269_fixups[] = {
{}
}
},
+ [ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE] = {
+ .type = HDA_FIXUP_VERBS,
+ .v.verbs = (const struct hda_verb[]) {
+ /* Disable pass-through path for FRONT 14h */
+ {0x20, AC_VERB_SET_COEF_INDEX, 0x36},
+ {0x20, AC_VERB_SET_PROC_COEF, 0x1737},
+ {}
+ },
+ .chained = true,
+ .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
+ },
+ [ALC293_FIXUP_LENOVO_SPK_NOISE] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc_fixup_disable_aamix,
+ .chained = true,
+ .chain_id = ALC269_FIXUP_THINKPAD_ACPI
+ },
+ [ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc233_fixup_lenovo_line2_mic_hotkey,
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5180,8 +5320,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x0742, "Acer AO756", ALC271_FIXUP_HP_GATE_MIC_JACK),
SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS),
+ SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK),
SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS),
+ SND_PCI_QUIRK(0x1028, 0x05bd, "Dell Latitude E6440", ALC292_FIXUP_DELL_E7X),
SND_PCI_QUIRK(0x1028, 0x05ca, "Dell Latitude E7240", ALC292_FIXUP_DELL_E7X),
SND_PCI_QUIRK(0x1028, 0x05cb, "Dell Latitude E7440", ALC292_FIXUP_DELL_E7X),
SND_PCI_QUIRK(0x1028, 0x05da, "Dell Vostro 5460", ALC290_FIXUP_SUBWOOFER),
@@ -5199,11 +5341,12 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
- SND_PCI_QUIRK(0x1028, 0x06db, "Dell", ALC292_FIXUP_DISABLE_AAMIX),
- SND_PCI_QUIRK(0x1028, 0x06dd, "Dell", ALC292_FIXUP_DISABLE_AAMIX),
- SND_PCI_QUIRK(0x1028, 0x06de, "Dell", ALC292_FIXUP_DISABLE_AAMIX),
- SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC292_FIXUP_DISABLE_AAMIX),
- SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC292_FIXUP_DISABLE_AAMIX),
+ SND_PCI_QUIRK(0x1028, 0x06db, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
+ SND_PCI_QUIRK(0x1028, 0x06dd, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
+ SND_PCI_QUIRK(0x1028, 0x06de, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
+ SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
+ SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
+ SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -5302,15 +5445,18 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x21fb, "Thinkpad T430s", ALC269_FIXUP_LENOVO_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2203, "Thinkpad X230 Tablet", ALC269_FIXUP_LENOVO_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2208, "Thinkpad T431s", ALC269_FIXUP_LENOVO_DOCK),
- SND_PCI_QUIRK(0x17aa, 0x220c, "Thinkpad T440s", ALC292_FIXUP_TPT440_DOCK),
+ SND_PCI_QUIRK(0x17aa, 0x220c, "Thinkpad T440s", ALC292_FIXUP_TPT440),
SND_PCI_QUIRK(0x17aa, 0x220e, "Thinkpad T440p", ALC292_FIXUP_TPT440_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2210, "Thinkpad T540p", ALC292_FIXUP_TPT440_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2211, "Thinkpad W541", ALC292_FIXUP_TPT440_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2212, "Thinkpad T440", ALC292_FIXUP_TPT440_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2214, "Thinkpad X240", ALC292_FIXUP_TPT440_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2215, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+ SND_PCI_QUIRK(0x17aa, 0x2218, "Thinkpad X1 Carbon 2nd", ALC292_FIXUP_TPT440_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK),
+ SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE),
+ SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP),
SND_PCI_QUIRK(0x17aa, 0x5013, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -5320,6 +5466,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x5034, "Thinkpad T450", ALC292_FIXUP_TPT440_DOCK),
SND_PCI_QUIRK(0x17aa, 0x5036, "Thinkpad T450s", ALC292_FIXUP_TPT440_DOCK),
SND_PCI_QUIRK(0x17aa, 0x503c, "Thinkpad L450", ALC292_FIXUP_TPT440_DOCK),
+ SND_PCI_QUIRK(0x17aa, 0x504b, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE),
SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),
SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
@@ -5400,6 +5547,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
{.id = ALC283_FIXUP_CHROME_BOOK, .name = "alc283-dac-wcaps"},
{.id = ALC283_FIXUP_SENSE_COMBO_JACK, .name = "alc283-sense-combo"},
{.id = ALC292_FIXUP_TPT440_DOCK, .name = "tpt440-dock"},
+ {.id = ALC292_FIXUP_TPT440, .name = "tpt440"},
{}
};
@@ -6386,6 +6534,7 @@ static const struct hda_fixup alc662_fixups[] = {
static const struct snd_pci_quirk alc662_fixup_tbl[] = {
SND_PCI_QUIRK(0x1019, 0x9087, "ECS", ALC662_FIXUP_ASUS_MODE2),
SND_PCI_QUIRK(0x1025, 0x022f, "Acer Aspire One", ALC662_FIXUP_INV_DMIC),
+ SND_PCI_QUIRK(0x1025, 0x0241, "Packard Bell DOTS", ALC662_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x1025, 0x0308, "Acer Aspire 8942G", ALC662_FIXUP_ASPIRE),
SND_PCI_QUIRK(0x1025, 0x031c, "Gateway NV79", ALC662_FIXUP_SKU_IGNORE),
SND_PCI_QUIRK(0x1025, 0x0349, "eMachines eM250", ALC662_FIXUP_INV_DMIC),
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 826122d8acee..2c7c5eb8b1e9 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -3110,6 +3110,29 @@ static void stac92hd71bxx_fixup_hp_hdx(struct hda_codec *codec,
spec->gpio_led = 0x08;
}
+static bool is_hp_output(struct hda_codec *codec, hda_nid_t pin)
+{
+ unsigned int pin_cfg = snd_hda_codec_get_pincfg(codec, pin);
+
+ /* count line-out, too, as BIOS sets often so */
+ return get_defcfg_connect(pin_cfg) != AC_JACK_PORT_NONE &&
+ (get_defcfg_device(pin_cfg) == AC_JACK_LINE_OUT ||
+ get_defcfg_device(pin_cfg) == AC_JACK_HP_OUT);
+}
+
+static void fixup_hp_headphone(struct hda_codec *codec, hda_nid_t pin)
+{
+ unsigned int pin_cfg = snd_hda_codec_get_pincfg(codec, pin);
+
+ /* It was changed in the BIOS to just satisfy MS DTM.
+ * Lets turn it back into slaved HP
+ */
+ pin_cfg = (pin_cfg & (~AC_DEFCFG_DEVICE)) |
+ (AC_JACK_HP_OUT << AC_DEFCFG_DEVICE_SHIFT);
+ pin_cfg = (pin_cfg & (~(AC_DEFCFG_DEF_ASSOC | AC_DEFCFG_SEQUENCE))) |
+ 0x1f;
+ snd_hda_codec_set_pincfg(codec, pin, pin_cfg);
+}
static void stac92hd71bxx_fixup_hp(struct hda_codec *codec,
const struct hda_fixup *fix, int action)
@@ -3119,22 +3142,12 @@ static void stac92hd71bxx_fixup_hp(struct hda_codec *codec,
if (action != HDA_FIXUP_ACT_PRE_PROBE)
return;
- if (hp_blike_system(codec->core.subsystem_id)) {
- unsigned int pin_cfg = snd_hda_codec_get_pincfg(codec, 0x0f);
- if (get_defcfg_device(pin_cfg) == AC_JACK_LINE_OUT ||
- get_defcfg_device(pin_cfg) == AC_JACK_SPEAKER ||
- get_defcfg_device(pin_cfg) == AC_JACK_HP_OUT) {
- /* It was changed in the BIOS to just satisfy MS DTM.
- * Lets turn it back into slaved HP
- */
- pin_cfg = (pin_cfg & (~AC_DEFCFG_DEVICE))
- | (AC_JACK_HP_OUT <<
- AC_DEFCFG_DEVICE_SHIFT);
- pin_cfg = (pin_cfg & (~(AC_DEFCFG_DEF_ASSOC
- | AC_DEFCFG_SEQUENCE)))
- | 0x1f;
- snd_hda_codec_set_pincfg(codec, 0x0f, pin_cfg);
- }
+ /* when both output A and F are assigned, these are supposedly
+ * dock and built-in headphones; fix both pin configs
+ */
+ if (is_hp_output(codec, 0x0a) && is_hp_output(codec, 0x0f)) {
+ fixup_hp_headphone(codec, 0x0a);
+ fixup_hp_headphone(codec, 0x0f);
}
if (find_mute_led_cfg(codec, 1))
diff --git a/sound/pci/rme96.c b/sound/pci/rme96.c
index 714df906249e..41c31db65039 100644
--- a/sound/pci/rme96.c
+++ b/sound/pci/rme96.c
@@ -741,10 +741,11 @@ snd_rme96_playback_setrate(struct rme96 *rme96,
{
/* change to/from double-speed: reset the DAC (if available) */
snd_rme96_reset_dac(rme96);
+ return 1; /* need to restore volume */
} else {
writel(rme96->wcreg, rme96->iobase + RME96_IO_CONTROL_REGISTER);
+ return 0;
}
- return 0;
}
static int
@@ -980,6 +981,7 @@ snd_rme96_playback_hw_params(struct snd_pcm_substream *substream,
struct rme96 *rme96 = snd_pcm_substream_chip(substream);
struct snd_pcm_runtime *runtime = substream->runtime;
int err, rate, dummy;
+ bool apply_dac_volume = false;
runtime->dma_area = (void __force *)(rme96->iobase +
RME96_IO_PLAY_BUFFER);
@@ -993,24 +995,26 @@ snd_rme96_playback_hw_params(struct snd_pcm_substream *substream,
{
/* slave clock */
if ((int)params_rate(params) != rate) {
- spin_unlock_irq(&rme96->lock);
- return -EIO;
- }
- } else if ((err = snd_rme96_playback_setrate(rme96, params_rate(params))) < 0) {
- spin_unlock_irq(&rme96->lock);
- return err;
- }
- if ((err = snd_rme96_playback_setformat(rme96, params_format(params))) < 0) {
- spin_unlock_irq(&rme96->lock);
- return err;
+ err = -EIO;
+ goto error;
+ }
+ } else {
+ err = snd_rme96_playback_setrate(rme96, params_rate(params));
+ if (err < 0)
+ goto error;
+ apply_dac_volume = err > 0; /* need to restore volume later? */
}
+
+ err = snd_rme96_playback_setformat(rme96, params_format(params));
+ if (err < 0)
+ goto error;
snd_rme96_setframelog(rme96, params_channels(params), 1);
if (rme96->capture_periodsize != 0) {
if (params_period_size(params) << rme96->playback_frlog !=
rme96->capture_periodsize)
{
- spin_unlock_irq(&rme96->lock);
- return -EBUSY;
+ err = -EBUSY;
+ goto error;
}
}
rme96->playback_periodsize =
@@ -1021,9 +1025,16 @@ snd_rme96_playback_hw_params(struct snd_pcm_substream *substream,
rme96->wcreg &= ~(RME96_WCR_PRO | RME96_WCR_DOLBY | RME96_WCR_EMP);
writel(rme96->wcreg |= rme96->wcreg_spdif_stream, rme96->iobase + RME96_IO_CONTROL_REGISTER);
}
+
+ err = 0;
+ error:
spin_unlock_irq(&rme96->lock);
-
- return 0;
+ if (apply_dac_volume) {
+ usleep_range(3000, 10000);
+ snd_rme96_apply_dac_volume(rme96);
+ }
+
+ return err;
}
static int
diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c
index 9929efc6b9aa..93b400800905 100644
--- a/sound/soc/codecs/arizona.c
+++ b/sound/soc/codecs/arizona.c
@@ -1023,24 +1023,18 @@ void arizona_init_dvfs(struct arizona_priv *priv)
}
EXPORT_SYMBOL_GPL(arizona_init_dvfs);
-static unsigned int arizona_sysclk_48k_rates[] = {
+static unsigned int arizona_opclk_ref_48k_rates[] = {
6144000,
12288000,
24576000,
49152000,
- 73728000,
- 98304000,
- 147456000,
};
-static unsigned int arizona_sysclk_44k1_rates[] = {
+static unsigned int arizona_opclk_ref_44k1_rates[] = {
5644800,
11289600,
22579200,
45158400,
- 67737600,
- 90316800,
- 135475200,
};
static int arizona_set_opclk(struct snd_soc_codec *codec, unsigned int clk,
@@ -1065,11 +1059,11 @@ static int arizona_set_opclk(struct snd_soc_codec *codec, unsigned int clk,
}
if (refclk % 8000)
- rates = arizona_sysclk_44k1_rates;
+ rates = arizona_opclk_ref_44k1_rates;
else
- rates = arizona_sysclk_48k_rates;
+ rates = arizona_opclk_ref_48k_rates;
- for (ref = 0; ref < ARRAY_SIZE(arizona_sysclk_48k_rates) &&
+ for (ref = 0; ref < ARRAY_SIZE(arizona_opclk_ref_48k_rates) &&
rates[ref] <= refclk; ref++) {
div = 1;
while (rates[ref] / div >= freq && div < 32) {
@@ -1543,7 +1537,7 @@ static int arizona_hw_params(struct snd_pcm_substream *substream,
bool reconfig;
unsigned int aif_tx_state, aif_rx_state;
- if (params_rate(params) % 8000)
+ if (params_rate(params) % 4000)
rates = &arizona_44k1_bclk_rates[0];
else
rates = &arizona_48k_bclk_rates[0];
diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c
index 969e337dc17c..afa6c5db9dcc 100644
--- a/sound/soc/codecs/es8328.c
+++ b/sound/soc/codecs/es8328.c
@@ -85,7 +85,15 @@ static const DECLARE_TLV_DB_SCALE(pga_tlv, 0, 300, 0);
static const DECLARE_TLV_DB_SCALE(bypass_tlv, -1500, 300, 0);
static const DECLARE_TLV_DB_SCALE(mic_tlv, 0, 300, 0);
-static const int deemph_settings[] = { 0, 32000, 44100, 48000 };
+static const struct {
+ int rate;
+ unsigned int val;
+} deemph_settings[] = {
+ { 0, ES8328_DACCONTROL6_DEEMPH_OFF },
+ { 32000, ES8328_DACCONTROL6_DEEMPH_32k },
+ { 44100, ES8328_DACCONTROL6_DEEMPH_44_1k },
+ { 48000, ES8328_DACCONTROL6_DEEMPH_48k },
+};
static int es8328_set_deemph(struct snd_soc_codec *codec)
{
@@ -97,21 +105,22 @@ static int es8328_set_deemph(struct snd_soc_codec *codec)
* rate.
*/
if (es8328->deemph) {
- best = 1;
- for (i = 2; i < ARRAY_SIZE(deemph_settings); i++) {
- if (abs(deemph_settings[i] - es8328->playback_fs) <
- abs(deemph_settings[best] - es8328->playback_fs))
+ best = 0;
+ for (i = 1; i < ARRAY_SIZE(deemph_settings); i++) {
+ if (abs(deemph_settings[i].rate - es8328->playback_fs) <
+ abs(deemph_settings[best].rate - es8328->playback_fs))
best = i;
}
- val = best << 1;
+ val = deemph_settings[best].val;
} else {
- val = 0;
+ val = ES8328_DACCONTROL6_DEEMPH_OFF;
}
dev_dbg(codec->dev, "Set deemphasis %d\n", val);
- return snd_soc_update_bits(codec, ES8328_DACCONTROL6, 0x6, val);
+ return snd_soc_update_bits(codec, ES8328_DACCONTROL6,
+ ES8328_DACCONTROL6_DEEMPH_MASK, val);
}
static int es8328_get_deemph(struct snd_kcontrol *kcontrol,
@@ -205,18 +214,18 @@ static const struct snd_kcontrol_new es8328_right_line_controls =
/* Left Mixer */
static const struct snd_kcontrol_new es8328_left_mixer_controls[] = {
- SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL17, 8, 1, 0),
- SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL17, 7, 1, 0),
- SOC_DAPM_SINGLE("Right Playback Switch", ES8328_DACCONTROL18, 8, 1, 0),
- SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL18, 7, 1, 0),
+ SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL17, 7, 1, 0),
+ SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL17, 6, 1, 0),
+ SOC_DAPM_SINGLE("Right Playback Switch", ES8328_DACCONTROL18, 7, 1, 0),
+ SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL18, 6, 1, 0),
};
/* Right Mixer */
static const struct snd_kcontrol_new es8328_right_mixer_controls[] = {
- SOC_DAPM_SINGLE("Left Playback Switch", ES8328_DACCONTROL19, 8, 1, 0),
- SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL19, 7, 1, 0),
- SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL20, 8, 1, 0),
- SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL20, 7, 1, 0),
+ SOC_DAPM_SINGLE("Left Playback Switch", ES8328_DACCONTROL19, 7, 1, 0),
+ SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL19, 6, 1, 0),
+ SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL20, 7, 1, 0),
+ SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL20, 6, 1, 0),
};
static const char * const es8328_pga_sel[] = {
diff --git a/sound/soc/codecs/es8328.h b/sound/soc/codecs/es8328.h
index cb36afe10c0e..156c748c89c7 100644
--- a/sound/soc/codecs/es8328.h
+++ b/sound/soc/codecs/es8328.h
@@ -153,6 +153,7 @@ int es8328_probe(struct device *dev, struct regmap *regmap);
#define ES8328_DACCONTROL6_CLICKFREE (1 << 3)
#define ES8328_DACCONTROL6_DAC_INVR (1 << 4)
#define ES8328_DACCONTROL6_DAC_INVL (1 << 5)
+#define ES8328_DACCONTROL6_DEEMPH_MASK (3 << 6)
#define ES8328_DACCONTROL6_DEEMPH_OFF (0 << 6)
#define ES8328_DACCONTROL6_DEEMPH_32k (1 << 6)
#define ES8328_DACCONTROL6_DEEMPH_44_1k (2 << 6)
diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c
index 7fc7b4e3f444..c1b87c5800b1 100644
--- a/sound/soc/codecs/nau8825.c
+++ b/sound/soc/codecs/nau8825.c
@@ -1271,6 +1271,36 @@ static int nau8825_i2c_remove(struct i2c_client *client)
return 0;
}
+#ifdef CONFIG_PM_SLEEP
+static int nau8825_suspend(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct nau8825 *nau8825 = dev_get_drvdata(dev);
+
+ disable_irq(client->irq);
+ regcache_cache_only(nau8825->regmap, true);
+ regcache_mark_dirty(nau8825->regmap);
+
+ return 0;
+}
+
+static int nau8825_resume(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct nau8825 *nau8825 = dev_get_drvdata(dev);
+
+ regcache_cache_only(nau8825->regmap, false);
+ regcache_sync(nau8825->regmap);
+ enable_irq(client->irq);
+
+ return 0;
+}
+#endif
+
+static const struct dev_pm_ops nau8825_pm = {
+ SET_SYSTEM_SLEEP_PM_OPS(nau8825_suspend, nau8825_resume)
+};
+
static const struct i2c_device_id nau8825_i2c_ids[] = {
{ "nau8825", 0 },
{ }
@@ -1297,6 +1327,7 @@ static struct i2c_driver nau8825_driver = {
.name = "nau8825",
.of_match_table = of_match_ptr(nau8825_of_ids),
.acpi_match_table = ACPI_PTR(nau8825_acpi_match),
+ .pm = &nau8825_pm,
},
.probe = nau8825_i2c_probe,
.remove = nau8825_i2c_remove,
diff --git a/sound/soc/codecs/rl6231.c b/sound/soc/codecs/rl6231.c
index aca479fa7670..1dc68ab08a17 100644
--- a/sound/soc/codecs/rl6231.c
+++ b/sound/soc/codecs/rl6231.c
@@ -80,8 +80,10 @@ int rl6231_calc_dmic_clk(int rate)
}
for (i = 0; i < ARRAY_SIZE(div); i++) {
- /* find divider that gives DMIC frequency below 3MHz */
- if (3000000 * div[i] >= rate)
+ if ((div[i] % 3) == 0)
+ continue;
+ /* find divider that gives DMIC frequency below 3.072MHz */
+ if (3072000 * div[i] >= rate)
return i;
}
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 28132375e427..3e3c7f6be29d 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -245,7 +245,7 @@ struct rt5645_priv {
struct snd_soc_jack *hp_jack;
struct snd_soc_jack *mic_jack;
struct snd_soc_jack *btn_jack;
- struct delayed_work jack_detect_work;
+ struct delayed_work jack_detect_work, rcclock_work;
struct regulator_bulk_data supplies[ARRAY_SIZE(rt5645_supply_names)];
struct rt5645_eq_param_s *eq_param;
@@ -565,12 +565,33 @@ static int rt5645_hweq_put(struct snd_kcontrol *kcontrol,
.put = rt5645_hweq_put \
}
+static int rt5645_spk_put_volsw(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+{
+ struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+ struct rt5645_priv *rt5645 = snd_soc_component_get_drvdata(component);
+ int ret;
+
+ cancel_delayed_work_sync(&rt5645->rcclock_work);
+
+ regmap_update_bits(rt5645->regmap, RT5645_MICBIAS,
+ RT5645_PWR_CLK25M_MASK, RT5645_PWR_CLK25M_PU);
+
+ ret = snd_soc_put_volsw(kcontrol, ucontrol);
+
+ queue_delayed_work(system_power_efficient_wq, &rt5645->rcclock_work,
+ msecs_to_jiffies(200));
+
+ return ret;
+}
+
static const struct snd_kcontrol_new rt5645_snd_controls[] = {
/* Speaker Output Volume */
SOC_DOUBLE("Speaker Channel Switch", RT5645_SPK_VOL,
RT5645_VOL_L_SFT, RT5645_VOL_R_SFT, 1, 1),
- SOC_DOUBLE_TLV("Speaker Playback Volume", RT5645_SPK_VOL,
- RT5645_L_VOL_SFT, RT5645_R_VOL_SFT, 39, 1, out_vol_tlv),
+ SOC_DOUBLE_EXT_TLV("Speaker Playback Volume", RT5645_SPK_VOL,
+ RT5645_L_VOL_SFT, RT5645_R_VOL_SFT, 39, 1, snd_soc_get_volsw,
+ rt5645_spk_put_volsw, out_vol_tlv),
/* ClassD modulator Speaker Gain Ratio */
SOC_SINGLE_TLV("Speaker ClassD Playback Volume", RT5645_SPO_CLSD_RATIO,
@@ -1498,7 +1519,7 @@ static void hp_amp_power(struct snd_soc_codec *codec, int on)
regmap_write(rt5645->regmap, RT5645_PR_BASE +
RT5645_MAMP_INT_REG2, 0xfc00);
snd_soc_write(codec, RT5645_DEPOP_M2, 0x1140);
- msleep(40);
+ msleep(70);
rt5645->hp_on = true;
} else {
/* depop parameters */
@@ -1646,9 +1667,13 @@ static int rt5645_spk_event(struct snd_soc_dapm_widget *w,
RT5645_PWR_CLS_D_L,
RT5645_PWR_CLS_D | RT5645_PWR_CLS_D_R |
RT5645_PWR_CLS_D_L);
+ snd_soc_update_bits(codec, RT5645_GEN_CTRL3,
+ RT5645_DET_CLK_MASK, RT5645_DET_CLK_MODE1);
break;
case SND_SOC_DAPM_PRE_PMD:
+ snd_soc_update_bits(codec, RT5645_GEN_CTRL3,
+ RT5645_DET_CLK_MASK, RT5645_DET_CLK_DIS);
snd_soc_write(codec, RT5645_EQ_CTRL2, 0);
snd_soc_update_bits(codec, RT5645_PWR_DIG1,
RT5645_PWR_CLS_D | RT5645_PWR_CLS_D_R |
@@ -3122,6 +3147,15 @@ static void rt5645_jack_detect_work(struct work_struct *work)
SND_JACK_BTN_2 | SND_JACK_BTN_3);
}
+static void rt5645_rcclock_work(struct work_struct *work)
+{
+ struct rt5645_priv *rt5645 =
+ container_of(work, struct rt5645_priv, rcclock_work.work);
+
+ regmap_update_bits(rt5645->regmap, RT5645_MICBIAS,
+ RT5645_PWR_CLK25M_MASK, RT5645_PWR_CLK25M_PD);
+}
+
static irqreturn_t rt5645_irq(int irq, void *data)
{
struct rt5645_priv *rt5645 = data;
@@ -3348,6 +3382,27 @@ static const struct dmi_system_id dmi_platform_intel_braswell[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Reks"),
},
},
+ {
+ .ident = "Google Edgar",
+ .callback = strago_quirk_cb,
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "Edgar"),
+ },
+ },
+ {
+ .ident = "Google Wizpig",
+ .callback = strago_quirk_cb,
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "Wizpig"),
+ },
+ },
+ {
+ .ident = "Google Terra",
+ .callback = strago_quirk_cb,
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "Terra"),
+ },
+ },
{ }
};
@@ -3587,6 +3642,7 @@ static int rt5645_i2c_probe(struct i2c_client *i2c,
}
INIT_DELAYED_WORK(&rt5645->jack_detect_work, rt5645_jack_detect_work);
+ INIT_DELAYED_WORK(&rt5645->rcclock_work, rt5645_rcclock_work);
if (rt5645->i2c->irq) {
ret = request_threaded_irq(rt5645->i2c->irq, NULL, rt5645_irq,
@@ -3621,6 +3677,7 @@ static int rt5645_i2c_remove(struct i2c_client *i2c)
free_irq(i2c->irq, rt5645);
cancel_delayed_work_sync(&rt5645->jack_detect_work);
+ cancel_delayed_work_sync(&rt5645->rcclock_work);
snd_soc_unregister_codec(&i2c->dev);
regulator_bulk_disable(ARRAY_SIZE(rt5645->supplies), rt5645->supplies);
diff --git a/sound/soc/codecs/rt5645.h b/sound/soc/codecs/rt5645.h
index 093e46d559fb..205e0715c99a 100644
--- a/sound/soc/codecs/rt5645.h
+++ b/sound/soc/codecs/rt5645.h
@@ -2122,6 +2122,10 @@ enum {
/* General Control3 (0xfc) */
#define RT5645_JD_PSV_MODE (0x1 << 12)
#define RT5645_IRQ_CLK_GATE_CTRL (0x1 << 11)
+#define RT5645_DET_CLK_MASK (0x3 << 9)
+#define RT5645_DET_CLK_DIS (0x0 << 9)
+#define RT5645_DET_CLK_MODE1 (0x1 << 9)
+#define RT5645_DET_CLK_MODE2 (0x2 << 9)
#define RT5645_MICINDET_MANU (0x1 << 7)
#define RT5645_RING2_SLEEVE_GND (0x1 << 5)
diff --git a/sound/soc/codecs/rt5670.h b/sound/soc/codecs/rt5670.h
index dc2b46236c5c..3f1b0f1df809 100644
--- a/sound/soc/codecs/rt5670.h
+++ b/sound/soc/codecs/rt5670.h
@@ -973,12 +973,12 @@
#define RT5670_SCLK_SRC_MCLK (0x0 << 14)
#define RT5670_SCLK_SRC_PLL1 (0x1 << 14)
#define RT5670_SCLK_SRC_RCCLK (0x2 << 14) /* 15MHz */
-#define RT5670_PLL1_SRC_MASK (0x3 << 12)
-#define RT5670_PLL1_SRC_SFT 12
-#define RT5670_PLL1_SRC_MCLK (0x0 << 12)
-#define RT5670_PLL1_SRC_BCLK1 (0x1 << 12)
-#define RT5670_PLL1_SRC_BCLK2 (0x2 << 12)
-#define RT5670_PLL1_SRC_BCLK3 (0x3 << 12)
+#define RT5670_PLL1_SRC_MASK (0x7 << 11)
+#define RT5670_PLL1_SRC_SFT 11
+#define RT5670_PLL1_SRC_MCLK (0x0 << 11)
+#define RT5670_PLL1_SRC_BCLK1 (0x1 << 11)
+#define RT5670_PLL1_SRC_BCLK2 (0x2 << 11)
+#define RT5670_PLL1_SRC_BCLK3 (0x3 << 11)
#define RT5670_PLL1_PD_MASK (0x1 << 3)
#define RT5670_PLL1_PD_SFT 3
#define RT5670_PLL1_PD_1 (0x0 << 3)
diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c
index b4cd7e3bf5f8..69d987a9935c 100644
--- a/sound/soc/codecs/rt5677.c
+++ b/sound/soc/codecs/rt5677.c
@@ -1386,90 +1386,90 @@ static const struct snd_kcontrol_new rt5677_dac_r_mix[] = {
};
static const struct snd_kcontrol_new rt5677_sto1_dac_l_mix[] = {
- SOC_DAPM_SINGLE("ST L Switch", RT5677_STO1_DAC_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("ST L Switch", RT5677_STO1_DAC_MIXER,
RT5677_M_ST_DAC1_L_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC1 L Switch", RT5677_STO1_DAC_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC1 L Switch", RT5677_STO1_DAC_MIXER,
RT5677_M_DAC1_L_STO_L_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC2 L Switch", RT5677_STO1_DAC_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC2 L Switch", RT5677_STO1_DAC_MIXER,
RT5677_M_DAC2_L_STO_L_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC1 R Switch", RT5677_STO1_DAC_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC1 R Switch", RT5677_STO1_DAC_MIXER,
RT5677_M_DAC1_R_STO_L_SFT, 1, 1),
};
static const struct snd_kcontrol_new rt5677_sto1_dac_r_mix[] = {
- SOC_DAPM_SINGLE("ST R Switch", RT5677_STO1_DAC_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("ST R Switch", RT5677_STO1_DAC_MIXER,
RT5677_M_ST_DAC1_R_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC1 R Switch", RT5677_STO1_DAC_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC1 R Switch", RT5677_STO1_DAC_MIXER,
RT5677_M_DAC1_R_STO_R_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC2 R Switch", RT5677_STO1_DAC_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC2 R Switch", RT5677_STO1_DAC_MIXER,
RT5677_M_DAC2_R_STO_R_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC1 L Switch", RT5677_STO1_DAC_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC1 L Switch", RT5677_STO1_DAC_MIXER,
RT5677_M_DAC1_L_STO_R_SFT, 1, 1),
};
static const struct snd_kcontrol_new rt5677_mono_dac_l_mix[] = {
- SOC_DAPM_SINGLE("ST L Switch", RT5677_MONO_DAC_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("ST L Switch", RT5677_MONO_DAC_MIXER,
RT5677_M_ST_DAC2_L_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC1 L Switch", RT5677_MONO_DAC_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC1 L Switch", RT5677_MONO_DAC_MIXER,
RT5677_M_DAC1_L_MONO_L_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC2 L Switch", RT5677_MONO_DAC_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC2 L Switch", RT5677_MONO_DAC_MIXER,
RT5677_M_DAC2_L_MONO_L_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC2 R Switch", RT5677_MONO_DAC_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC2 R Switch", RT5677_MONO_DAC_MIXER,
RT5677_M_DAC2_R_MONO_L_SFT, 1, 1),
};
static const struct snd_kcontrol_new rt5677_mono_dac_r_mix[] = {
- SOC_DAPM_SINGLE("ST R Switch", RT5677_MONO_DAC_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("ST R Switch", RT5677_MONO_DAC_MIXER,
RT5677_M_ST_DAC2_R_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC1 R Switch", RT5677_MONO_DAC_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC1 R Switch", RT5677_MONO_DAC_MIXER,
RT5677_M_DAC1_R_MONO_R_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC2 R Switch", RT5677_MONO_DAC_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC2 R Switch", RT5677_MONO_DAC_MIXER,
RT5677_M_DAC2_R_MONO_R_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC2 L Switch", RT5677_MONO_DAC_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC2 L Switch", RT5677_MONO_DAC_MIXER,
RT5677_M_DAC2_L_MONO_R_SFT, 1, 1),
};
static const struct snd_kcontrol_new rt5677_dd1_l_mix[] = {
- SOC_DAPM_SINGLE("Sto DAC Mix L Switch", RT5677_DD1_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("Sto DAC Mix L Switch", RT5677_DD1_MIXER,
RT5677_M_STO_L_DD1_L_SFT, 1, 1),
- SOC_DAPM_SINGLE("Mono DAC Mix L Switch", RT5677_DD1_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("Mono DAC Mix L Switch", RT5677_DD1_MIXER,
RT5677_M_MONO_L_DD1_L_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC3 L Switch", RT5677_DD1_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC3 L Switch", RT5677_DD1_MIXER,
RT5677_M_DAC3_L_DD1_L_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC3 R Switch", RT5677_DD1_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC3 R Switch", RT5677_DD1_MIXER,
RT5677_M_DAC3_R_DD1_L_SFT, 1, 1),
};
static const struct snd_kcontrol_new rt5677_dd1_r_mix[] = {
- SOC_DAPM_SINGLE("Sto DAC Mix R Switch", RT5677_DD1_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("Sto DAC Mix R Switch", RT5677_DD1_MIXER,
RT5677_M_STO_R_DD1_R_SFT, 1, 1),
- SOC_DAPM_SINGLE("Mono DAC Mix R Switch", RT5677_DD1_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("Mono DAC Mix R Switch", RT5677_DD1_MIXER,
RT5677_M_MONO_R_DD1_R_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC3 R Switch", RT5677_DD1_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC3 R Switch", RT5677_DD1_MIXER,
RT5677_M_DAC3_R_DD1_R_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC3 L Switch", RT5677_DD1_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC3 L Switch", RT5677_DD1_MIXER,
RT5677_M_DAC3_L_DD1_R_SFT, 1, 1),
};
static const struct snd_kcontrol_new rt5677_dd2_l_mix[] = {
- SOC_DAPM_SINGLE("Sto DAC Mix L Switch", RT5677_DD2_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("Sto DAC Mix L Switch", RT5677_DD2_MIXER,
RT5677_M_STO_L_DD2_L_SFT, 1, 1),
- SOC_DAPM_SINGLE("Mono DAC Mix L Switch", RT5677_DD2_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("Mono DAC Mix L Switch", RT5677_DD2_MIXER,
RT5677_M_MONO_L_DD2_L_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC4 L Switch", RT5677_DD2_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC4 L Switch", RT5677_DD2_MIXER,
RT5677_M_DAC4_L_DD2_L_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC4 R Switch", RT5677_DD2_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC4 R Switch", RT5677_DD2_MIXER,
RT5677_M_DAC4_R_DD2_L_SFT, 1, 1),
};
static const struct snd_kcontrol_new rt5677_dd2_r_mix[] = {
- SOC_DAPM_SINGLE("Sto DAC Mix R Switch", RT5677_DD2_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("Sto DAC Mix R Switch", RT5677_DD2_MIXER,
RT5677_M_STO_R_DD2_R_SFT, 1, 1),
- SOC_DAPM_SINGLE("Mono DAC Mix R Switch", RT5677_DD2_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("Mono DAC Mix R Switch", RT5677_DD2_MIXER,
RT5677_M_MONO_R_DD2_R_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC4 R Switch", RT5677_DD2_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC4 R Switch", RT5677_DD2_MIXER,
RT5677_M_DAC4_R_DD2_R_SFT, 1, 1),
- SOC_DAPM_SINGLE("DAC4 L Switch", RT5677_DD2_MIXER,
+ SOC_DAPM_SINGLE_AUTODISABLE("DAC4 L Switch", RT5677_DD2_MIXER,
RT5677_M_DAC4_L_DD2_R_SFT, 1, 1),
};
@@ -2596,6 +2596,21 @@ static int rt5677_vref_event(struct snd_soc_dapm_widget *w,
return 0;
}
+static int rt5677_filter_power_event(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+{
+ switch (event) {
+ case SND_SOC_DAPM_POST_PMU:
+ msleep(50);
+ break;
+
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
static const struct snd_soc_dapm_widget rt5677_dapm_widgets[] = {
SND_SOC_DAPM_SUPPLY("PLL1", RT5677_PWR_ANLG2, RT5677_PWR_PLL1_BIT,
0, rt5677_set_pll1_event, SND_SOC_DAPM_PRE_PMU |
@@ -3072,19 +3087,26 @@ static const struct snd_soc_dapm_widget rt5677_dapm_widgets[] = {
/* DAC Mixer */
SND_SOC_DAPM_SUPPLY("dac stereo1 filter", RT5677_PWR_DIG2,
- RT5677_PWR_DAC_S1F_BIT, 0, NULL, 0),
+ RT5677_PWR_DAC_S1F_BIT, 0, rt5677_filter_power_event,
+ SND_SOC_DAPM_POST_PMU),
SND_SOC_DAPM_SUPPLY("dac mono2 left filter", RT5677_PWR_DIG2,
- RT5677_PWR_DAC_M2F_L_BIT, 0, NULL, 0),
+ RT5677_PWR_DAC_M2F_L_BIT, 0, rt5677_filter_power_event,
+ SND_SOC_DAPM_POST_PMU),
SND_SOC_DAPM_SUPPLY("dac mono2 right filter", RT5677_PWR_DIG2,
- RT5677_PWR_DAC_M2F_R_BIT, 0, NULL, 0),
+ RT5677_PWR_DAC_M2F_R_BIT, 0, rt5677_filter_power_event,
+ SND_SOC_DAPM_POST_PMU),
SND_SOC_DAPM_SUPPLY("dac mono3 left filter", RT5677_PWR_DIG2,
- RT5677_PWR_DAC_M3F_L_BIT, 0, NULL, 0),
+ RT5677_PWR_DAC_M3F_L_BIT, 0, rt5677_filter_power_event,
+ SND_SOC_DAPM_POST_PMU),
SND_SOC_DAPM_SUPPLY("dac mono3 right filter", RT5677_PWR_DIG2,
- RT5677_PWR_DAC_M3F_R_BIT, 0, NULL, 0),
+ RT5677_PWR_DAC_M3F_R_BIT, 0, rt5677_filter_power_event,
+ SND_SOC_DAPM_POST_PMU),
SND_SOC_DAPM_SUPPLY("dac mono4 left filter", RT5677_PWR_DIG2,
- RT5677_PWR_DAC_M4F_L_BIT, 0, NULL, 0),
+ RT5677_PWR_DAC_M4F_L_BIT, 0, rt5677_filter_power_event,
+ SND_SOC_DAPM_POST_PMU),
SND_SOC_DAPM_SUPPLY("dac mono4 right filter", RT5677_PWR_DIG2,
- RT5677_PWR_DAC_M4F_R_BIT, 0, NULL, 0),
+ RT5677_PWR_DAC_M4F_R_BIT, 0, rt5677_filter_power_event,
+ SND_SOC_DAPM_POST_PMU),
SND_SOC_DAPM_MIXER("Stereo DAC MIXL", SND_SOC_NOPM, 0, 0,
rt5677_sto1_dac_l_mix, ARRAY_SIZE(rt5677_sto1_dac_l_mix)),
diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c
index f540f82b1f27..08b40460663c 100644
--- a/sound/soc/codecs/sgtl5000.c
+++ b/sound/soc/codecs/sgtl5000.c
@@ -189,6 +189,7 @@ static int power_vag_event(struct snd_soc_dapm_widget *w,
case SND_SOC_DAPM_POST_PMU:
snd_soc_update_bits(codec, SGTL5000_CHIP_ANA_POWER,
SGTL5000_VAG_POWERUP, SGTL5000_VAG_POWERUP);
+ msleep(400);
break;
case SND_SOC_DAPM_PRE_PMD:
diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c
index 056375339ea3..5380798883b5 100644
--- a/sound/soc/codecs/wm8960.c
+++ b/sound/soc/codecs/wm8960.c
@@ -229,7 +229,7 @@ SOC_DOUBLE_R_TLV("Capture Volume", WM8960_LINVOL, WM8960_RINVOL,
SOC_DOUBLE_R("Capture Volume ZC Switch", WM8960_LINVOL, WM8960_RINVOL,
6, 1, 0),
SOC_DOUBLE_R("Capture Switch", WM8960_LINVOL, WM8960_RINVOL,
- 7, 1, 0),
+ 7, 1, 1),
SOC_SINGLE_TLV("Right Input Boost Mixer RINPUT3 Volume",
WM8960_INBMIX1, 4, 7, 0, lineinboost_tlv),
diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
index 39ebd7bf4f53..a7e79784fc16 100644
--- a/sound/soc/codecs/wm8962.c
+++ b/sound/soc/codecs/wm8962.c
@@ -365,8 +365,8 @@ static const struct reg_default wm8962_reg[] = {
{ 16924, 0x0059 }, /* R16924 - HDBASS_PG_1 */
{ 16925, 0x999A }, /* R16925 - HDBASS_PG_0 */
- { 17048, 0x0083 }, /* R17408 - HPF_C_1 */
- { 17049, 0x98AD }, /* R17409 - HPF_C_0 */
+ { 17408, 0x0083 }, /* R17408 - HPF_C_1 */
+ { 17409, 0x98AD }, /* R17409 - HPF_C_0 */
{ 17920, 0x007F }, /* R17920 - ADCL_RETUNE_C1_1 */
{ 17921, 0xFFFF }, /* R17921 - ADCL_RETUNE_C1_0 */
diff --git a/sound/soc/codecs/wm8974.c b/sound/soc/codecs/wm8974.c
index 0a60677397b3..4c29bd2ae75c 100644
--- a/sound/soc/codecs/wm8974.c
+++ b/sound/soc/codecs/wm8974.c
@@ -574,6 +574,7 @@ static const struct regmap_config wm8974_regmap = {
.max_register = WM8974_MONOMIX,
.reg_defaults = wm8974_reg_defaults,
.num_reg_defaults = ARRAY_SIZE(wm8974_reg_defaults),
+ .cache_type = REGCACHE_FLAT,
};
static int wm8974_probe(struct snd_soc_codec *codec)
diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c
index 4495a40a9468..2ccb8bccc9d4 100644
--- a/sound/soc/davinci/davinci-mcasp.c
+++ b/sound/soc/davinci/davinci-mcasp.c
@@ -223,8 +223,8 @@ static void mcasp_start_tx(struct davinci_mcasp *mcasp)
/* wait for XDATA to be cleared */
cnt = 0;
- while (!(mcasp_get_reg(mcasp, DAVINCI_MCASP_TXSTAT_REG) &
- ~XRDATA) && (cnt < 100000))
+ while ((mcasp_get_reg(mcasp, DAVINCI_MCASP_TXSTAT_REG) & XRDATA) &&
+ (cnt < 100000))
cnt++;
/* Release TX state machine */
@@ -681,8 +681,8 @@ static int davinci_mcasp_set_tdm_slot(struct snd_soc_dai *dai,
}
mcasp->tdm_slots = slots;
- mcasp->tdm_mask[SNDRV_PCM_STREAM_PLAYBACK] = rx_mask;
- mcasp->tdm_mask[SNDRV_PCM_STREAM_CAPTURE] = tx_mask;
+ mcasp->tdm_mask[SNDRV_PCM_STREAM_PLAYBACK] = tx_mask;
+ mcasp->tdm_mask[SNDRV_PCM_STREAM_CAPTURE] = rx_mask;
mcasp->slot_width = slot_width;
return davinci_mcasp_set_ch_constraints(mcasp);
@@ -908,6 +908,14 @@ static int mcasp_i2s_hw_param(struct davinci_mcasp *mcasp, int stream,
mcasp_set_bits(mcasp, DAVINCI_MCASP_RXFMT_REG, busel | RXORD);
mcasp_mod_bits(mcasp, DAVINCI_MCASP_RXFMCTL_REG,
FSRMOD(total_slots), FSRMOD(0x1FF));
+ /*
+ * If McASP is set to be TX/RX synchronous and the playback is
+ * not running already we need to configure the TX slots in
+ * order to have correct FSX on the bus
+ */
+ if (mcasp_is_synchronous(mcasp) && !mcasp->channels)
+ mcasp_mod_bits(mcasp, DAVINCI_MCASP_TXFMCTL_REG,
+ FSXMOD(total_slots), FSXMOD(0x1FF));
}
return 0;
diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index 19c302b0d763..14dfdee05fd5 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -283,6 +283,8 @@ config SND_SOC_IMX_MC13783
config SND_SOC_FSL_ASOC_CARD
tristate "Generic ASoC Sound Card with ASRC support"
depends on OF && I2C
+ # enforce SND_SOC_FSL_ASOC_CARD=m if SND_AC97_CODEC=m:
+ depends on SND_AC97_CODEC || SND_AC97_CODEC=n
select SND_SOC_IMX_AUDMUX
select SND_SOC_IMX_PCM_DMA
select SND_SOC_FSL_ESAI
diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index a4435f5e3be9..08b460ba06ef 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -454,7 +454,8 @@ static int fsl_sai_trigger(struct snd_pcm_substream *substream, int cmd,
* Rx sync with Tx clocks: Clear SYNC for Tx, set it for Rx.
* Tx sync with Rx clocks: Clear SYNC for Rx, set it for Tx.
*/
- regmap_update_bits(sai->regmap, FSL_SAI_TCR2, FSL_SAI_CR2_SYNC, 0);
+ regmap_update_bits(sai->regmap, FSL_SAI_TCR2, FSL_SAI_CR2_SYNC,
+ sai->synchronous[TX] ? FSL_SAI_CR2_SYNC : 0);
regmap_update_bits(sai->regmap, FSL_SAI_RCR2, FSL_SAI_CR2_SYNC,
sai->synchronous[RX] ? FSL_SAI_CR2_SYNC : 0);
@@ -504,6 +505,24 @@ static int fsl_sai_trigger(struct snd_pcm_substream *substream, int cmd,
FSL_SAI_CSR_FR, FSL_SAI_CSR_FR);
regmap_update_bits(sai->regmap, FSL_SAI_RCSR,
FSL_SAI_CSR_FR, FSL_SAI_CSR_FR);
+
+ /*
+ * For sai master mode, after several open/close sai,
+ * there will be no frame clock, and can't recover
+ * anymore. Add software reset to fix this issue.
+ * This is a hardware bug, and will be fix in the
+ * next sai version.
+ */
+ if (!sai->is_slave_mode) {
+ /* Software Reset for both Tx and Rx */
+ regmap_write(sai->regmap,
+ FSL_SAI_TCSR, FSL_SAI_CSR_SR);
+ regmap_write(sai->regmap,
+ FSL_SAI_RCSR, FSL_SAI_CSR_SR);
+ /* Clear SR bit to finish the reset */
+ regmap_write(sai->regmap, FSL_SAI_TCSR, 0);
+ regmap_write(sai->regmap, FSL_SAI_RCSR, 0);
+ }
}
break;
default:
diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig
index 7b778ab85f8b..d430ef5a4f38 100644
--- a/sound/soc/intel/Kconfig
+++ b/sound/soc/intel/Kconfig
@@ -144,7 +144,7 @@ config SND_SOC_INTEL_SKYLAKE
config SND_SOC_INTEL_SKL_RT286_MACH
tristate "ASoC Audio driver for SKL with RT286 I2S mode"
- depends on X86 && ACPI
+ depends on X86 && ACPI && I2C
select SND_SOC_INTEL_SST
select SND_SOC_INTEL_SKYLAKE
select SND_SOC_RT286
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index a7854c8fc523..ad4d0f82603e 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -1248,5 +1248,7 @@ int skl_tplg_init(struct snd_soc_platform *platform, struct hdac_ext_bus *ebus)
skl->resource.max_mcps = SKL_MAX_MCPS;
skl->resource.max_mem = SKL_FW_MAX_MEM;
+ skl->tplg = fw;
+
return 0;
}
diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
index 5319529aedf7..caa69c4598a6 100644
--- a/sound/soc/intel/skylake/skl.c
+++ b/sound/soc/intel/skylake/skl.c
@@ -25,6 +25,7 @@
#include <linux/pci.h>
#include <linux/pm_runtime.h>
#include <linux/platform_device.h>
+#include <linux/firmware.h>
#include <sound/pcm.h>
#include "skl.h"
@@ -520,6 +521,9 @@ static void skl_remove(struct pci_dev *pci)
struct hdac_ext_bus *ebus = pci_get_drvdata(pci);
struct skl *skl = ebus_to_skl(ebus);
+ if (skl->tplg)
+ release_firmware(skl->tplg);
+
if (pci_dev_run_wake(pci))
pm_runtime_get_noresume(&pci->dev);
pci_dev_put(pci);
diff --git a/sound/soc/intel/skylake/skl.h b/sound/soc/intel/skylake/skl.h
index dd2e79ae45a8..a0709e344d44 100644
--- a/sound/soc/intel/skylake/skl.h
+++ b/sound/soc/intel/skylake/skl.h
@@ -68,6 +68,8 @@ struct skl {
struct skl_dsp_resource resource;
struct list_head ppl_list;
struct list_head dapm_path_list;
+
+ const struct firmware *tplg;
};
#define skl_to_ebus(s) (&(s)->ebus)
diff --git a/sound/soc/rockchip/rockchip_spdif.c b/sound/soc/rockchip/rockchip_spdif.c
index a38a3029062c..5a806da89f42 100644
--- a/sound/soc/rockchip/rockchip_spdif.c
+++ b/sound/soc/rockchip/rockchip_spdif.c
@@ -152,8 +152,10 @@ static int rk_spdif_trigger(struct snd_pcm_substream *substream,
case SNDRV_PCM_TRIGGER_RESUME:
case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
ret = regmap_update_bits(spdif->regmap, SPDIF_DMACR,
- SPDIF_DMACR_TDE_ENABLE,
- SPDIF_DMACR_TDE_ENABLE);
+ SPDIF_DMACR_TDE_ENABLE |
+ SPDIF_DMACR_TDL_MASK,
+ SPDIF_DMACR_TDE_ENABLE |
+ SPDIF_DMACR_TDL(16));
if (ret != 0)
return ret;
@@ -280,7 +282,7 @@ static int rk_spdif_probe(struct platform_device *pdev)
int ret;
match = of_match_node(rk_spdif_match, np);
- if ((int) match->data == RK_SPDIF_RK3288) {
+ if (match->data == (void *)RK_SPDIF_RK3288) {
struct regmap *grf;
grf = syscon_regmap_lookup_by_phandle(np, "rockchip,grf");
diff --git a/sound/soc/rockchip/rockchip_spdif.h b/sound/soc/rockchip/rockchip_spdif.h
index 07f86a21046a..3ef12770ae12 100644
--- a/sound/soc/rockchip/rockchip_spdif.h
+++ b/sound/soc/rockchip/rockchip_spdif.h
@@ -28,9 +28,9 @@
#define SPDIF_CFGR_VDW(x) (x << SPDIF_CFGR_VDW_SHIFT)
#define SDPIF_CFGR_VDW_MASK (0xf << SPDIF_CFGR_VDW_SHIFT)
-#define SPDIF_CFGR_VDW_16 SPDIF_CFGR_VDW(0x00)
-#define SPDIF_CFGR_VDW_20 SPDIF_CFGR_VDW(0x01)
-#define SPDIF_CFGR_VDW_24 SPDIF_CFGR_VDW(0x10)
+#define SPDIF_CFGR_VDW_16 SPDIF_CFGR_VDW(0x0)
+#define SPDIF_CFGR_VDW_20 SPDIF_CFGR_VDW(0x1)
+#define SPDIF_CFGR_VDW_24 SPDIF_CFGR_VDW(0x2)
/*
* DMACR
@@ -42,7 +42,7 @@
#define SPDIF_DMACR_TDL_SHIFT 0
#define SPDIF_DMACR_TDL(x) ((x) << SPDIF_DMACR_TDL_SHIFT)
-#define SPDIF_DMACR_TDL_MASK (0x1f << SDPIF_DMACR_TDL_SHIFT)
+#define SPDIF_DMACR_TDL_MASK (0x1f << SPDIF_DMACR_TDL_SHIFT)
/*
* XFER
diff --git a/sound/soc/sh/rcar/gen.c b/sound/soc/sh/rcar/gen.c
index 76da7620904c..edcf4cc2e84f 100644
--- a/sound/soc/sh/rcar/gen.c
+++ b/sound/soc/sh/rcar/gen.c
@@ -235,7 +235,7 @@ static int rsnd_gen2_probe(struct platform_device *pdev,
RSND_GEN_S_REG(SCU_SYS_STATUS0, 0x1c8),
RSND_GEN_S_REG(SCU_SYS_INT_EN0, 0x1cc),
RSND_GEN_S_REG(SCU_SYS_STATUS1, 0x1d0),
- RSND_GEN_S_REG(SCU_SYS_INT_EN1, 0x1c4),
+ RSND_GEN_S_REG(SCU_SYS_INT_EN1, 0x1d4),
RSND_GEN_M_REG(SRC_SWRSR, 0x200, 0x40),
RSND_GEN_M_REG(SRC_SRCIR, 0x204, 0x40),
RSND_GEN_M_REG(SRC_ADINR, 0x214, 0x40),
diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c
index 261b50217c48..68b439ed22d7 100644
--- a/sound/soc/sh/rcar/src.c
+++ b/sound/soc/sh/rcar/src.c
@@ -923,6 +923,7 @@ static int rsnd_src_pcm_new_gen2(struct rsnd_mod *mod,
struct snd_soc_pcm_runtime *rtd)
{
struct rsnd_dai *rdai = rsnd_io_to_rdai(io);
+ struct rsnd_mod *dvc = rsnd_io_to_mod_dvc(io);
struct rsnd_src *src = rsnd_mod_to_src(mod);
int ret;
@@ -937,6 +938,12 @@ static int rsnd_src_pcm_new_gen2(struct rsnd_mod *mod,
return 0;
/*
+ * SRC In doesn't work if DVC was enabled
+ */
+ if (dvc && !rsnd_io_is_play(io))
+ return 0;
+
+ /*
* enable sync convert
*/
ret = rsnd_kctrl_new_s(mod, io, rtd,
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 24b096066a07..a1305f827a98 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -795,12 +795,12 @@ static void soc_resume_deferred(struct work_struct *work)
dev_dbg(card->dev, "ASoC: resume work completed\n");
- /* userspace can access us now we are back as we were before */
- snd_power_change_state(card->snd_card, SNDRV_CTL_POWER_D0);
-
/* Recheck all endpoints too, their state is affected by suspend */
dapm_mark_endpoints_dirty(card);
snd_soc_dapm_sync(&card->dapm);
+
+ /* userspace can access us now we are back as we were before */
+ snd_power_change_state(card->snd_card, SNDRV_CTL_POWER_D0);
}
/* powers up audio subsystem after a suspend */
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 016eba10b1ec..7d009428934a 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -2293,6 +2293,12 @@ void snd_soc_dapm_free_widget(struct snd_soc_dapm_widget *w)
kfree(w);
}
+void snd_soc_dapm_reset_cache(struct snd_soc_dapm_context *dapm)
+{
+ dapm->path_sink_cache.widget = NULL;
+ dapm->path_source_cache.widget = NULL;
+}
+
/* free all dapm widgets and resources */
static void dapm_free_widgets(struct snd_soc_dapm_context *dapm)
{
@@ -2303,6 +2309,7 @@ static void dapm_free_widgets(struct snd_soc_dapm_context *dapm)
continue;
snd_soc_dapm_free_widget(w);
}
+ snd_soc_dapm_reset_cache(dapm);
}
static struct snd_soc_dapm_widget *dapm_find_widget(
diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
index ecd38e52285a..2f67ba6d7a8f 100644
--- a/sound/soc/soc-ops.c
+++ b/sound/soc/soc-ops.c
@@ -404,7 +404,7 @@ EXPORT_SYMBOL_GPL(snd_soc_get_volsw_sx);
/**
* snd_soc_put_volsw_sx - double mixer set callback
* @kcontrol: mixer control
- * @uinfo: control element information
+ * @ucontrol: control element information
*
* Callback to set the value of a double mixer control that spans 2 registers.
*
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 8d7ec80af51b..6963ba20991c 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -531,7 +531,7 @@ static int soc_tplg_kcontrol_bind_io(struct snd_soc_tplg_ctl_hdr *hdr,
/* TLV bytes controls need standard kcontrol info handler,
* TLV callback and extended put/get handlers.
*/
- k->info = snd_soc_bytes_info;
+ k->info = snd_soc_bytes_info_ext;
k->tlv.c = snd_soc_bytes_tlv_callback;
ext_ops = tplg->bytes_ext_ops;
@@ -1805,6 +1805,7 @@ void snd_soc_tplg_widget_remove_all(struct snd_soc_dapm_context *dapm,
snd_soc_tplg_widget_remove(w);
snd_soc_dapm_free_widget(w);
}
+ snd_soc_dapm_reset_cache(dapm);
}
EXPORT_SYMBOL_GPL(snd_soc_tplg_widget_remove_all);
diff --git a/sound/soc/sti/uniperif_player.c b/sound/soc/sti/uniperif_player.c
index 843f037a317d..5c2bc53f0a9b 100644
--- a/sound/soc/sti/uniperif_player.c
+++ b/sound/soc/sti/uniperif_player.c
@@ -669,6 +669,7 @@ static int uni_player_startup(struct snd_pcm_substream *substream,
{
struct sti_uniperiph_data *priv = snd_soc_dai_get_drvdata(dai);
struct uniperif *player = priv->dai_data.uni;
+ player->substream = substream;
player->clk_adj = 0;
@@ -950,6 +951,8 @@ static void uni_player_shutdown(struct snd_pcm_substream *substream,
if (player->state != UNIPERIF_STATE_STOPPED)
/* Stop the player */
uni_player_stop(player);
+
+ player->substream = NULL;
}
static int uni_player_parse_dt_clk_glue(struct platform_device *pdev,
@@ -989,7 +992,7 @@ static int uni_player_parse_dt(struct platform_device *pdev,
if (!info)
return -ENOMEM;
- if (of_property_read_u32(pnode, "version", &player->ver) ||
+ if (of_property_read_u32(pnode, "st,version", &player->ver) ||
player->ver == SND_ST_UNIPERIF_VERSION_UNKNOWN) {
dev_err(dev, "Unknown uniperipheral version ");
return -EINVAL;
@@ -998,13 +1001,13 @@ static int uni_player_parse_dt(struct platform_device *pdev,
if (player->ver >= SND_ST_UNIPERIF_VERSION_UNI_PLR_TOP_1_0)
info->underflow_enabled = 1;
- if (of_property_read_u32(pnode, "uniperiph-id", &info->id)) {
+ if (of_property_read_u32(pnode, "st,uniperiph-id", &info->id)) {
dev_err(dev, "uniperipheral id not defined");
return -EINVAL;
}
/* Read the device mode property */
- if (of_property_read_string(pnode, "mode", &mode)) {
+ if (of_property_read_string(pnode, "st,mode", &mode)) {
dev_err(dev, "uniperipheral mode not defined");
return -EINVAL;
}
diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c
index f791239a3087..8a0eb2050169 100644
--- a/sound/soc/sti/uniperif_reader.c
+++ b/sound/soc/sti/uniperif_reader.c
@@ -316,7 +316,7 @@ static int uni_reader_parse_dt(struct platform_device *pdev,
if (!info)
return -ENOMEM;
- if (of_property_read_u32(node, "version", &reader->ver) ||
+ if (of_property_read_u32(node, "st,version", &reader->ver) ||
reader->ver == SND_ST_UNIPERIF_VERSION_UNKNOWN) {
dev_err(&pdev->dev, "Unknown uniperipheral version ");
return -EINVAL;
@@ -346,7 +346,6 @@ int uni_reader_init(struct platform_device *pdev,
reader->hw = &uni_reader_pcm_hw;
reader->dai_ops = &uni_reader_dai_ops;
- dev_err(reader->dev, "%s: enter\n", __func__);
ret = uni_reader_parse_dt(pdev, reader);
if (ret < 0) {
dev_err(reader->dev, "Failed to parse DeviceTree");
diff --git a/sound/soc/sunxi/sun4i-codec.c b/sound/soc/sunxi/sun4i-codec.c
index bcbf4da168b6..1bb896d78d09 100644
--- a/sound/soc/sunxi/sun4i-codec.c
+++ b/sound/soc/sunxi/sun4i-codec.c
@@ -2,6 +2,7 @@
* Copyright 2014 Emilio López <emilio@elopez.com.ar>
* Copyright 2014 Jon Smirl <jonsmirl@gmail.com>
* Copyright 2015 Maxime Ripard <maxime.ripard@free-electrons.com>
+ * Copyright 2015 Adam Sampson <ats@offog.org>
*
* Based on the Allwinner SDK driver, released under the GPL.
*
@@ -404,7 +405,7 @@ static const struct snd_kcontrol_new sun4i_codec_pa_mute =
static DECLARE_TLV_DB_SCALE(sun4i_codec_pa_volume_scale, -6300, 100, 1);
static const struct snd_kcontrol_new sun4i_codec_widgets[] = {
- SOC_SINGLE_TLV("PA Volume", SUN4I_CODEC_DAC_ACTL,
+ SOC_SINGLE_TLV("Power Amplifier Volume", SUN4I_CODEC_DAC_ACTL,
SUN4I_CODEC_DAC_ACTL_PA_VOL, 0x3F, 0,
sun4i_codec_pa_volume_scale),
};
@@ -452,12 +453,12 @@ static const struct snd_soc_dapm_widget sun4i_codec_dapm_widgets[] = {
SND_SOC_DAPM_SUPPLY("Mixer Enable", SUN4I_CODEC_DAC_ACTL,
SUN4I_CODEC_DAC_ACTL_MIXEN, 0, NULL, 0),
- /* Pre-Amplifier */
- SND_SOC_DAPM_MIXER("Pre-Amplifier", SUN4I_CODEC_ADC_ACTL,
+ /* Power Amplifier */
+ SND_SOC_DAPM_MIXER("Power Amplifier", SUN4I_CODEC_ADC_ACTL,
SUN4I_CODEC_ADC_ACTL_PA_EN, 0,
sun4i_codec_pa_mixer_controls,
ARRAY_SIZE(sun4i_codec_pa_mixer_controls)),
- SND_SOC_DAPM_SWITCH("Pre-Amplifier Mute", SND_SOC_NOPM, 0, 0,
+ SND_SOC_DAPM_SWITCH("Power Amplifier Mute", SND_SOC_NOPM, 0, 0,
&sun4i_codec_pa_mute),
SND_SOC_DAPM_OUTPUT("HP Right"),
@@ -480,16 +481,16 @@ static const struct snd_soc_dapm_route sun4i_codec_dapm_routes[] = {
{ "Left Mixer", NULL, "Mixer Enable" },
{ "Left Mixer", "Left DAC Playback Switch", "Left DAC" },
- /* Pre-Amplifier Mixer Routes */
- { "Pre-Amplifier", "Mixer Playback Switch", "Left Mixer" },
- { "Pre-Amplifier", "Mixer Playback Switch", "Right Mixer" },
- { "Pre-Amplifier", "DAC Playback Switch", "Left DAC" },
- { "Pre-Amplifier", "DAC Playback Switch", "Right DAC" },
+ /* Power Amplifier Routes */
+ { "Power Amplifier", "Mixer Playback Switch", "Left Mixer" },
+ { "Power Amplifier", "Mixer Playback Switch", "Right Mixer" },
+ { "Power Amplifier", "DAC Playback Switch", "Left DAC" },
+ { "Power Amplifier", "DAC Playback Switch", "Right DAC" },
- /* PA -> HP path */
- { "Pre-Amplifier Mute", "Switch", "Pre-Amplifier" },
- { "HP Right", NULL, "Pre-Amplifier Mute" },
- { "HP Left", NULL, "Pre-Amplifier Mute" },
+ /* Headphone Output Routes */
+ { "Power Amplifier Mute", "Switch", "Power Amplifier" },
+ { "HP Right", NULL, "Power Amplifier Mute" },
+ { "HP Left", NULL, "Power Amplifier Mute" },
};
static struct snd_soc_codec_driver sun4i_codec_codec = {
diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index 7661616f3636..5b4c58c3e2c5 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -174,6 +174,8 @@ struct snd_usb_midi_in_endpoint {
u8 running_status_length;
} ports[0x10];
u8 seen_f5;
+ bool in_sysex;
+ u8 last_cin;
u8 error_resubmit;
int current_port;
};
@@ -468,6 +470,39 @@ static void snd_usbmidi_maudio_broken_running_status_input(
}
/*
+ * QinHeng CH345 is buggy: every second packet inside a SysEx has not CIN 4
+ * but the previously seen CIN, but still with three data bytes.
+ */
+static void ch345_broken_sysex_input(struct snd_usb_midi_in_endpoint *ep,
+ uint8_t *buffer, int buffer_length)
+{
+ unsigned int i, cin, length;
+
+ for (i = 0; i + 3 < buffer_length; i += 4) {
+ if (buffer[i] == 0 && i > 0)
+ break;
+ cin = buffer[i] & 0x0f;
+ if (ep->in_sysex &&
+ cin == ep->last_cin &&
+ (buffer[i + 1 + (cin == 0x6)] & 0x80) == 0)
+ cin = 0x4;
+#if 0
+ if (buffer[i + 1] == 0x90) {
+ /*
+ * Either a corrupted running status or a real note-on
+ * message; impossible to detect reliably.
+ */
+ }
+#endif
+ length = snd_usbmidi_cin_length[cin];
+ snd_usbmidi_input_data(ep, 0, &buffer[i + 1], length);
+ ep->in_sysex = cin == 0x4;
+ if (!ep->in_sysex)
+ ep->last_cin = cin;
+ }
+}
+
+/*
* CME protocol: like the standard protocol, but SysEx commands are sent as a
* single USB packet preceded by a 0x0F byte.
*/
@@ -660,6 +695,12 @@ static struct usb_protocol_ops snd_usbmidi_cme_ops = {
.output_packet = snd_usbmidi_output_standard_packet,
};
+static struct usb_protocol_ops snd_usbmidi_ch345_broken_sysex_ops = {
+ .input = ch345_broken_sysex_input,
+ .output = snd_usbmidi_standard_output,
+ .output_packet = snd_usbmidi_output_standard_packet,
+};
+
/*
* AKAI MPD16 protocol:
*
@@ -1341,6 +1382,7 @@ static int snd_usbmidi_out_endpoint_create(struct snd_usb_midi *umidi,
* Various chips declare a packet size larger than 4 bytes, but
* do not actually work with larger packets:
*/
+ case USB_ID(0x0a67, 0x5011): /* Medeli DD305 */
case USB_ID(0x0a92, 0x1020): /* ESI M4U */
case USB_ID(0x1430, 0x474b): /* RedOctane GH MIDI INTERFACE */
case USB_ID(0x15ca, 0x0101): /* Textech USB Midi Cable */
@@ -2378,6 +2420,10 @@ int snd_usbmidi_create(struct snd_card *card,
err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
break;
+ case QUIRK_MIDI_CH345:
+ umidi->usb_protocol_ops = &snd_usbmidi_ch345_broken_sysex_ops;
+ err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
+ break;
default:
dev_err(&umidi->dev->dev, "invalid quirk type %d\n",
quirk->type);
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index f494dced3c11..4f85757009b3 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -1354,6 +1354,8 @@ static void build_feature_ctl(struct mixer_build *state, void *raw_desc,
}
}
+ snd_usb_mixer_fu_apply_quirk(state->mixer, cval, unitid, kctl);
+
range = (cval->max - cval->min) / cval->res;
/*
* Are there devices with volume range more than 255? I use a bit more
diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c
index 6a803eff87f7..ddca6547399b 100644
--- a/sound/usb/mixer_maps.c
+++ b/sound/usb/mixer_maps.c
@@ -348,13 +348,6 @@ static struct usbmix_name_map bose_companion5_map[] = {
{ 0 } /* terminator */
};
-/* Dragonfly DAC 1.2, the dB conversion factor is 1 instead of 256 */
-static struct usbmix_dB_map dragonfly_1_2_dB = {0, 5000};
-static struct usbmix_name_map dragonfly_1_2_map[] = {
- { 7, NULL, .dB = &dragonfly_1_2_dB },
- { 0 } /* terminator */
-};
-
/*
* Control map entries
*/
@@ -470,11 +463,6 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = {
.id = USB_ID(0x05a7, 0x1020),
.map = bose_companion5_map,
},
- {
- /* Dragonfly DAC 1.2 */
- .id = USB_ID(0x21b4, 0x0081),
- .map = dragonfly_1_2_map,
- },
{ 0 } /* terminator */
};
diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
index fe91184ce832..0ce888dceed0 100644
--- a/sound/usb/mixer_quirks.c
+++ b/sound/usb/mixer_quirks.c
@@ -37,6 +37,7 @@
#include <sound/control.h>
#include <sound/hwdep.h>
#include <sound/info.h>
+#include <sound/tlv.h>
#include "usbaudio.h"
#include "mixer.h"
@@ -1825,3 +1826,39 @@ void snd_usb_mixer_rc_memory_change(struct usb_mixer_interface *mixer,
}
}
+static void snd_dragonfly_quirk_db_scale(struct usb_mixer_interface *mixer,
+ struct snd_kcontrol *kctl)
+{
+ /* Approximation using 10 ranges based on output measurement on hw v1.2.
+ * This seems close to the cubic mapping e.g. alsamixer uses. */
+ static const DECLARE_TLV_DB_RANGE(scale,
+ 0, 1, TLV_DB_MINMAX_ITEM(-5300, -4970),
+ 2, 5, TLV_DB_MINMAX_ITEM(-4710, -4160),
+ 6, 7, TLV_DB_MINMAX_ITEM(-3884, -3710),
+ 8, 14, TLV_DB_MINMAX_ITEM(-3443, -2560),
+ 15, 16, TLV_DB_MINMAX_ITEM(-2475, -2324),
+ 17, 19, TLV_DB_MINMAX_ITEM(-2228, -2031),
+ 20, 26, TLV_DB_MINMAX_ITEM(-1910, -1393),
+ 27, 31, TLV_DB_MINMAX_ITEM(-1322, -1032),
+ 32, 40, TLV_DB_MINMAX_ITEM(-968, -490),
+ 41, 50, TLV_DB_MINMAX_ITEM(-441, 0),
+ );
+
+ usb_audio_info(mixer->chip, "applying DragonFly dB scale quirk\n");
+ kctl->tlv.p = scale;
+ kctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ;
+ kctl->vd[0].access &= ~SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK;
+}
+
+void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer,
+ struct usb_mixer_elem_info *cval, int unitid,
+ struct snd_kcontrol *kctl)
+{
+ switch (mixer->chip->usb_id) {
+ case USB_ID(0x21b4, 0x0081): /* AudioQuest DragonFly */
+ if (unitid == 7 && cval->min == 0 && cval->max == 50)
+ snd_dragonfly_quirk_db_scale(mixer, kctl);
+ break;
+ }
+}
+
diff --git a/sound/usb/mixer_quirks.h b/sound/usb/mixer_quirks.h
index bdbfab093816..177c329cd4dd 100644
--- a/sound/usb/mixer_quirks.h
+++ b/sound/usb/mixer_quirks.h
@@ -9,5 +9,9 @@ void snd_emuusb_set_samplerate(struct snd_usb_audio *chip,
void snd_usb_mixer_rc_memory_change(struct usb_mixer_interface *mixer,
int unitid);
+void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer,
+ struct usb_mixer_elem_info *cval, int unitid,
+ struct snd_kcontrol *kctl);
+
#endif /* SND_USB_MIXER_QUIRKS_H */
diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index 1a1e2e4df35e..c60a776e815d 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -2829,6 +2829,17 @@ YAMAHA_DEVICE(0x7010, "UB99"),
.idProduct = 0x1020,
},
+/* QinHeng devices */
+{
+ USB_DEVICE(0x1a86, 0x752d),
+ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+ .vendor_name = "QinHeng",
+ .product_name = "CH345",
+ .ifnum = 1,
+ .type = QUIRK_MIDI_CH345
+ }
+},
+
/* KeithMcMillen Stringport */
{
USB_DEVICE(0x1f38, 0x0001),
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 5ca80e7d30cd..b6c0c8e3b450 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -538,6 +538,7 @@ int snd_usb_create_quirk(struct snd_usb_audio *chip,
[QUIRK_MIDI_CME] = create_any_midi_quirk,
[QUIRK_MIDI_AKAI] = create_any_midi_quirk,
[QUIRK_MIDI_FTDI] = create_any_midi_quirk,
+ [QUIRK_MIDI_CH345] = create_any_midi_quirk,
[QUIRK_AUDIO_STANDARD_INTERFACE] = create_standard_audio_quirk,
[QUIRK_AUDIO_FIXED_ENDPOINT] = create_fixed_stream_quirk,
[QUIRK_AUDIO_EDIROL_UAXX] = create_uaxx_quirk,
@@ -1124,6 +1125,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */
case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
+ case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */
return true;
}
return false;
diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index 15a12715bd05..b665d85555cb 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h
@@ -95,6 +95,7 @@ enum quirk_type {
QUIRK_MIDI_AKAI,
QUIRK_MIDI_US122L,
QUIRK_MIDI_FTDI,
+ QUIRK_MIDI_CH345,
QUIRK_AUDIO_STANDARD_INTERFACE,
QUIRK_AUDIO_FIXED_ENDPOINT,
QUIRK_AUDIO_EDIROL_UAXX,
diff --git a/tools/Makefile b/tools/Makefile
index d6f307dfb1a3..0ba0df3b516f 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -32,6 +32,10 @@ help:
@echo ' from the kernel command line to build and install one of'
@echo ' the tools above'
@echo ''
+ @echo ' $$ make tools/all'
+ @echo ''
+ @echo ' builds all tools.'
+ @echo ''
@echo ' $$ make tools/install'
@echo ''
@echo ' installs all tools.'
@@ -77,6 +81,11 @@ tmon: FORCE
freefall: FORCE
$(call descend,laptop/$@)
+all: acpi cgroup cpupower hv firewire lguest \
+ perf selftests turbostat usb \
+ virtio vm net x86_energy_perf_policy \
+ tmon freefall
+
acpi_install:
$(call descend,power/$(@:_install=),install)
@@ -87,7 +96,7 @@ cgroup_install firewire_install hv_install lguest_install perf_install usb_insta
$(call descend,$(@:_install=),install)
selftests_install:
- $(call descend,testing/$(@:_clean=),install)
+ $(call descend,testing/$(@:_install=),install)
turbostat_install x86_energy_perf_policy_install:
$(call descend,power/x86/$(@:_install=),install)
@@ -101,7 +110,7 @@ freefall_install:
install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \
perf_install selftests_install turbostat_install usb_install \
virtio_install vm_install net_install x86_energy_perf_policy_install \
- tmon freefall_install
+ tmon_install freefall_install
acpi_clean:
$(call descend,power/acpi,clean)
diff --git a/tools/build/Makefile b/tools/build/Makefile
index a93036272d43..0d5a0e3a8fa9 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -25,7 +25,7 @@ export Q srctree CC LD
MAKEFLAGS := --no-print-directory
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
-all: fixdep
+all: $(OUTPUT)fixdep
clean:
$(call QUIET_CLEAN, fixdep)
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 37ff4c9f92f1..02db3cdff20f 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -7,7 +7,7 @@ endif
feature_check = $(eval $(feature_check_code))
define feature_check_code
- feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
+ feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
endef
feature_set = $(eval $(feature_set_code))
@@ -101,7 +101,6 @@ ifeq ($(feature-all), 1)
#
$(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat)))
else
- $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) -i -j -C $(feature_dir) $(addsuffix .bin,$(FEATURE_TESTS)) >/dev/null 2>&1)
$(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat)))
endif
@@ -123,13 +122,31 @@ define feature_print_text_code
MSG = $(shell printf '...%30s: %s' $(1) $(2))
endef
+#
+# generates feature value assignment for name, like:
+# $(call feature_assign,dwarf) == feature-dwarf=1
+#
+feature_assign = feature-$(1)=$(feature-$(1))
+
FEATURE_DUMP_FILENAME = $(OUTPUT)FEATURE-DUMP$(FEATURE_USER)
-FEATURE_DUMP := $(foreach feat,$(FEATURE_DISPLAY),feature-$(feat)($(feature-$(feat))))
-FEATURE_DUMP_FILE := $(shell touch $(FEATURE_DUMP_FILENAME); cat $(FEATURE_DUMP_FILENAME))
+FEATURE_DUMP := $(shell touch $(FEATURE_DUMP_FILENAME); cat $(FEATURE_DUMP_FILENAME))
-ifeq ($(dwarf-post-unwind),1)
- FEATURE_DUMP += dwarf-post-unwind($(dwarf-post-unwind-text))
-endif
+feature_dump_check = $(eval $(feature_dump_check_code))
+define feature_dump_check_code
+ ifeq ($(findstring $(1),$(FEATURE_DUMP)),)
+ $(2) := 1
+ endif
+endef
+
+#
+# First check if any test from FEATURE_DISPLAY
+# and set feature_display := 1 if it does
+$(foreach feat,$(FEATURE_DISPLAY),$(call feature_dump_check,$(call feature_assign,$(feat)),feature_display))
+
+#
+# Now also check if any other test changed,
+# so we force FEATURE-DUMP generation
+$(foreach feat,$(FEATURE_TESTS),$(call feature_dump_check,$(call feature_assign,$(feat)),feature_dump_changed))
# The $(feature_display) controls the default detection message
# output. It's set if:
@@ -138,13 +155,13 @@ endif
# - one of the $(FEATURE_DISPLAY) is not detected
# - VF is enabled
-ifneq ("$(FEATURE_DUMP)","$(FEATURE_DUMP_FILE)")
- $(shell echo "$(FEATURE_DUMP)" > $(FEATURE_DUMP_FILENAME))
- feature_display := 1
+ifeq ($(feature_dump_changed),1)
+ $(shell rm -f $(FEATURE_DUMP_FILENAME))
+ $(foreach feat,$(FEATURE_TESTS),$(shell echo "$(call feature_assign,$(feat))" >> $(FEATURE_DUMP_FILENAME)))
endif
feature_display_check = $(eval $(feature_check_display_code))
-define feature_display_check_code
+define feature_check_display_code
ifneq ($(feature-$(1)), 1)
feature_display := 1
endif
@@ -161,11 +178,6 @@ ifeq ($(feature_display),1)
$(info )
$(info Auto-detecting system features:)
$(foreach feat,$(FEATURE_DISPLAY),$(call feature_print_status,$(feat),))
-
- ifeq ($(dwarf-post-unwind),1)
- $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text))
- endif
-
ifneq ($(feature_verbose),1)
$(info )
endif
diff --git a/tools/build/Makefile.include b/tools/build/Makefile.include
index 4e09ad617a60..be630bed66d2 100644
--- a/tools/build/Makefile.include
+++ b/tools/build/Makefile.include
@@ -4,7 +4,7 @@ ifdef CROSS_COMPILE
fixdep:
else
fixdep:
- $(Q)$(MAKE) -C $(srctree)/tools/build fixdep
+ $(Q)$(MAKE) -C $(srctree)/tools/build CFLAGS= LDFLAGS= $(OUTPUT)fixdep
endif
.PHONY: fixdep
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index cea04ce9f35c..bf8f0352264d 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -1,4 +1,3 @@
-
FILES= \
test-all.bin \
test-backtrace.bin \
@@ -38,38 +37,40 @@ FILES= \
test-bpf.bin \
test-get_cpuid.bin
+FILES := $(addprefix $(OUTPUT),$(FILES))
+
CC := $(CROSS_COMPILE)gcc -MD
PKG_CONFIG := $(CROSS_COMPILE)pkg-config
all: $(FILES)
-__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS)
- BUILD = $(__BUILD) > $(OUTPUT)$(@:.bin=.make.output) 2>&1
+__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
+ BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
###############################
-test-all.bin:
+$(OUTPUT)test-all.bin:
$(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma
-test-hello.bin:
+$(OUTPUT)test-hello.bin:
$(BUILD)
-test-pthread-attr-setaffinity-np.bin:
+$(OUTPUT)test-pthread-attr-setaffinity-np.bin:
$(BUILD) -D_GNU_SOURCE -lpthread
-test-stackprotector-all.bin:
+$(OUTPUT)test-stackprotector-all.bin:
$(BUILD) -fstack-protector-all
-test-fortify-source.bin:
+$(OUTPUT)test-fortify-source.bin:
$(BUILD) -O2 -D_FORTIFY_SOURCE=2
-test-bionic.bin:
+$(OUTPUT)test-bionic.bin:
$(BUILD)
-test-libelf.bin:
+$(OUTPUT)test-libelf.bin:
$(BUILD) -lelf
-test-glibc.bin:
+$(OUTPUT)test-glibc.bin:
$(BUILD)
DWARFLIBS := -ldw
@@ -77,37 +78,37 @@ ifeq ($(findstring -static,${LDFLAGS}),-static)
DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
endif
-test-dwarf.bin:
+$(OUTPUT)test-dwarf.bin:
$(BUILD) $(DWARFLIBS)
-test-libelf-mmap.bin:
+$(OUTPUT)test-libelf-mmap.bin:
$(BUILD) -lelf
-test-libelf-getphdrnum.bin:
+$(OUTPUT)test-libelf-getphdrnum.bin:
$(BUILD) -lelf
-test-libnuma.bin:
+$(OUTPUT)test-libnuma.bin:
$(BUILD) -lnuma
-test-numa_num_possible_cpus.bin:
+$(OUTPUT)test-numa_num_possible_cpus.bin:
$(BUILD) -lnuma
-test-libunwind.bin:
+$(OUTPUT)test-libunwind.bin:
$(BUILD) -lelf
-test-libunwind-debug-frame.bin:
+$(OUTPUT)test-libunwind-debug-frame.bin:
$(BUILD) -lelf
-test-libaudit.bin:
+$(OUTPUT)test-libaudit.bin:
$(BUILD) -laudit
-test-libslang.bin:
+$(OUTPUT)test-libslang.bin:
$(BUILD) -I/usr/include/slang -lslang
-test-gtk2.bin:
+$(OUTPUT)test-gtk2.bin:
$(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null)
-test-gtk2-infobar.bin:
+$(OUTPUT)test-gtk2-infobar.bin:
$(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null)
grep-libs = $(filter -l%,$(1))
@@ -119,63 +120,63 @@ PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
-test-libperl.bin:
+$(OUTPUT)test-libperl.bin:
$(BUILD) $(FLAGS_PERL_EMBED)
-test-libpython.bin:
+$(OUTPUT)test-libpython.bin:
$(BUILD)
-test-libpython-version.bin:
+$(OUTPUT)test-libpython-version.bin:
$(BUILD)
-test-libbfd.bin:
+$(OUTPUT)test-libbfd.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl
-test-liberty.bin:
- $(CC) $(CFLAGS) -Wall -Werror -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty
+$(OUTPUT)test-liberty.bin:
+ $(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty
-test-liberty-z.bin:
- $(CC) $(CFLAGS) -Wall -Werror -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty -lz
+$(OUTPUT)test-liberty-z.bin:
+ $(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty -lz
-test-cplus-demangle.bin:
+$(OUTPUT)test-cplus-demangle.bin:
$(BUILD) -liberty
-test-backtrace.bin:
+$(OUTPUT)test-backtrace.bin:
$(BUILD)
-test-timerfd.bin:
+$(OUTPUT)test-timerfd.bin:
$(BUILD)
-test-libdw-dwarf-unwind.bin:
+$(OUTPUT)test-libdw-dwarf-unwind.bin:
$(BUILD) # -ldw provided by $(FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind)
-test-libbabeltrace.bin:
+$(OUTPUT)test-libbabeltrace.bin:
$(BUILD) # -lbabeltrace provided by $(FEATURE_CHECK_LDFLAGS-libbabeltrace)
-test-sync-compare-and-swap.bin:
+$(OUTPUT)test-sync-compare-and-swap.bin:
$(BUILD)
-test-compile-32.bin:
- $(CC) -m32 -o $(OUTPUT)$@ test-compile.c
+$(OUTPUT)test-compile-32.bin:
+ $(CC) -m32 -o $@ test-compile.c
-test-compile-x32.bin:
- $(CC) -mx32 -o $(OUTPUT)$@ test-compile.c
+$(OUTPUT)test-compile-x32.bin:
+ $(CC) -mx32 -o $@ test-compile.c
-test-zlib.bin:
+$(OUTPUT)test-zlib.bin:
$(BUILD) -lz
-test-lzma.bin:
+$(OUTPUT)test-lzma.bin:
$(BUILD) -llzma
-test-get_cpuid.bin:
+$(OUTPUT)test-get_cpuid.bin:
$(BUILD)
-test-bpf.bin:
+$(OUTPUT)test-bpf.bin:
$(BUILD)
--include *.d
+-include $(OUTPUT)*.d
###############################
clean:
- rm -f $(FILES) *.d $(FILES:.bin=.make.output)
+ rm -f $(FILES) $(OUTPUT)*.d $(FILES:.bin=.make.output)
diff --git a/tools/perf/util/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index 40bd21488032..28f5493da491 100644
--- a/tools/perf/util/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -11,6 +11,8 @@ int __bitmap_weight(const unsigned long *bitmap, int bits);
void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, int bits);
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
+
#define BITMAP_LAST_WORD_MASK(nbits) \
( \
((nbits) % BITS_PER_LONG) ? \
diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h
new file mode 100644
index 000000000000..e26223f1f287
--- /dev/null
+++ b/tools/include/linux/string.h
@@ -0,0 +1,15 @@
+#ifndef _TOOLS_LINUX_STRING_H_
+#define _TOOLS_LINUX_STRING_H_
+
+
+#include <linux/types.h> /* for size_t */
+
+void *memdup(const void *src, size_t len);
+
+int strtobool(const char *s, bool *res);
+
+#ifndef __UCLIBC__
+extern size_t strlcpy(char *dest, const char *src, size_t size);
+#endif
+
+#endif /* _LINUX_STRING_H_ */
diff --git a/tools/perf/util/bitmap.c b/tools/lib/bitmap.c
index 0a1adc1111fd..0a1adc1111fd 100644
--- a/tools/perf/util/bitmap.c
+++ b/tools/lib/bitmap.c
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index a3caaf3eafbd..919b71780710 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -71,7 +71,21 @@ FEATURE_DISPLAY = libelf bpf
INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi
FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
+check_feat := 1
+NON_CHECK_FEAT_TARGETS := clean TAGS tags cscope help
+ifdef MAKECMDGOALS
+ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
+ check_feat := 0
+endif
+endif
+
+ifeq ($(check_feat),1)
+ifeq ($(FEATURES_DUMP),)
include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
+endif
export prefix libdir src obj
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index a6331050ab79..5bdc6eab6852 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -83,3 +83,17 @@ int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns,
log_buf[0] = 0;
return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
}
+
+int bpf_map_update_elem(int fd, void *key, void *value,
+ u64 flags)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
+ attr.flags = flags;
+
+ return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 854b7361b784..a76465541292 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -20,4 +20,6 @@ int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns,
u32 kern_version, char *log_buf,
size_t log_buf_sz);
+int bpf_map_update_elem(int fd, void *key, void *value,
+ u64 flags);
#endif
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e176bad19bcb..8334a5a9d5d7 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -152,29 +152,36 @@ struct bpf_program {
} *reloc_desc;
int nr_reloc;
- int fd;
+ struct {
+ int nr;
+ int *fds;
+ } instances;
+ bpf_program_prep_t preprocessor;
struct bpf_object *obj;
void *priv;
bpf_program_clear_priv_t clear_priv;
};
+struct bpf_map {
+ int fd;
+ char *name;
+ struct bpf_map_def def;
+ void *priv;
+ bpf_map_clear_priv_t clear_priv;
+};
+
static LIST_HEAD(bpf_objects_list);
struct bpf_object {
char license[64];
u32 kern_version;
- void *maps_buf;
- size_t maps_buf_sz;
struct bpf_program *programs;
size_t nr_programs;
- int *map_fds;
- /*
- * This field is required because maps_buf will be freed and
- * maps_buf_sz will be set to 0 after loaded.
- */
- size_t nr_map_fds;
+ struct bpf_map *maps;
+ size_t nr_maps;
+
bool loaded;
/*
@@ -188,6 +195,7 @@ struct bpf_object {
Elf *elf;
GElf_Ehdr ehdr;
Elf_Data *symbols;
+ size_t strtabidx;
struct {
GElf_Shdr shdr;
Elf_Data *data;
@@ -206,10 +214,25 @@ struct bpf_object {
static void bpf_program__unload(struct bpf_program *prog)
{
+ int i;
+
if (!prog)
return;
- zclose(prog->fd);
+ /*
+ * If the object is opened but the program was never loaded,
+ * it is possible that prog->instances.nr == -1.
+ */
+ if (prog->instances.nr > 0) {
+ for (i = 0; i < prog->instances.nr; i++)
+ zclose(prog->instances.fds[i]);
+ } else if (prog->instances.nr != -1) {
+ pr_warning("Internal error: instances.nr is %d\n",
+ prog->instances.nr);
+ }
+
+ prog->instances.nr = -1;
+ zfree(&prog->instances.fds);
}
static void bpf_program__exit(struct bpf_program *prog)
@@ -260,7 +283,8 @@ bpf_program__init(void *data, size_t size, char *name, int idx,
memcpy(prog->insns, data,
prog->insns_cnt * sizeof(struct bpf_insn));
prog->idx = idx;
- prog->fd = -1;
+ prog->instances.fds = NULL;
+ prog->instances.nr = -1;
return 0;
errout:
@@ -469,21 +493,77 @@ static int
bpf_object__init_maps(struct bpf_object *obj, void *data,
size_t size)
{
- if (size == 0) {
+ size_t nr_maps;
+ int i;
+
+ nr_maps = size / sizeof(struct bpf_map_def);
+ if (!data || !nr_maps) {
pr_debug("%s doesn't need map definition\n",
obj->path);
return 0;
}
- obj->maps_buf = malloc(size);
- if (!obj->maps_buf) {
- pr_warning("malloc maps failed: %s\n", obj->path);
+ pr_debug("maps in %s: %zd bytes\n", obj->path, size);
+
+ obj->maps = calloc(nr_maps, sizeof(obj->maps[0]));
+ if (!obj->maps) {
+ pr_warning("alloc maps for object failed\n");
return -ENOMEM;
}
+ obj->nr_maps = nr_maps;
- obj->maps_buf_sz = size;
- memcpy(obj->maps_buf, data, size);
- pr_debug("maps in %s: %ld bytes\n", obj->path, (long)size);
+ for (i = 0; i < nr_maps; i++) {
+ struct bpf_map_def *def = &obj->maps[i].def;
+
+ /*
+ * fill all fd with -1 so won't close incorrect
+ * fd (fd=0 is stdin) when failure (zclose won't close
+ * negative fd)).
+ */
+ obj->maps[i].fd = -1;
+
+ /* Save map definition into obj->maps */
+ *def = ((struct bpf_map_def *)data)[i];
+ }
+ return 0;
+}
+
+static int
+bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx)
+{
+ int i;
+ Elf_Data *symbols = obj->efile.symbols;
+
+ if (!symbols || maps_shndx < 0)
+ return -EINVAL;
+
+ for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
+ GElf_Sym sym;
+ size_t map_idx;
+ const char *map_name;
+
+ if (!gelf_getsym(symbols, i, &sym))
+ continue;
+ if (sym.st_shndx != maps_shndx)
+ continue;
+
+ map_name = elf_strptr(obj->efile.elf,
+ obj->efile.strtabidx,
+ sym.st_name);
+ map_idx = sym.st_value / sizeof(struct bpf_map_def);
+ if (map_idx >= obj->nr_maps) {
+ pr_warning("index of map \"%s\" is buggy: %zu > %zu\n",
+ map_name, map_idx, obj->nr_maps);
+ continue;
+ }
+ obj->maps[map_idx].name = strdup(map_name);
+ if (!obj->maps[map_idx].name) {
+ pr_warning("failed to alloc map name\n");
+ return -ENOMEM;
+ }
+ pr_debug("map %zu is \"%s\"\n", map_idx,
+ obj->maps[map_idx].name);
+ }
return 0;
}
@@ -492,7 +572,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
Elf *elf = obj->efile.elf;
GElf_Ehdr *ep = &obj->efile.ehdr;
Elf_Scn *scn = NULL;
- int idx = 0, err = 0;
+ int idx = 0, err = 0, maps_shndx = -1;
/* Elf is corrupted/truncated, avoid calling elf_strptr. */
if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
@@ -542,16 +622,19 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
err = bpf_object__init_kversion(obj,
data->d_buf,
data->d_size);
- else if (strcmp(name, "maps") == 0)
+ else if (strcmp(name, "maps") == 0) {
err = bpf_object__init_maps(obj, data->d_buf,
data->d_size);
- else if (sh.sh_type == SHT_SYMTAB) {
+ maps_shndx = idx;
+ } else if (sh.sh_type == SHT_SYMTAB) {
if (obj->efile.symbols) {
pr_warning("bpf: multiple SYMTAB in %s\n",
obj->path);
err = -LIBBPF_ERRNO__FORMAT;
- } else
+ } else {
obj->efile.symbols = data;
+ obj->efile.strtabidx = sh.sh_link;
+ }
} else if ((sh.sh_type == SHT_PROGBITS) &&
(sh.sh_flags & SHF_EXECINSTR) &&
(data->d_size > 0)) {
@@ -586,6 +669,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
if (err)
goto out;
}
+
+ if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {
+ pr_warning("Corrupted ELF file: index of strtab invalid\n");
+ return LIBBPF_ERRNO__FORMAT;
+ }
+ if (maps_shndx >= 0)
+ err = bpf_object__init_maps_name(obj, maps_shndx);
out:
return err;
}
@@ -668,37 +758,15 @@ static int
bpf_object__create_maps(struct bpf_object *obj)
{
unsigned int i;
- size_t nr_maps;
- int *pfd;
-
- nr_maps = obj->maps_buf_sz / sizeof(struct bpf_map_def);
- if (!obj->maps_buf || !nr_maps) {
- pr_debug("don't need create maps for %s\n",
- obj->path);
- return 0;
- }
-
- obj->map_fds = malloc(sizeof(int) * nr_maps);
- if (!obj->map_fds) {
- pr_warning("realloc perf_bpf_map_fds failed\n");
- return -ENOMEM;
- }
- obj->nr_map_fds = nr_maps;
- /* fill all fd with -1 */
- memset(obj->map_fds, -1, sizeof(int) * nr_maps);
-
- pfd = obj->map_fds;
- for (i = 0; i < nr_maps; i++) {
- struct bpf_map_def def;
+ for (i = 0; i < obj->nr_maps; i++) {
+ struct bpf_map_def *def = &obj->maps[i].def;
+ int *pfd = &obj->maps[i].fd;
- def = *(struct bpf_map_def *)(obj->maps_buf +
- i * sizeof(struct bpf_map_def));
-
- *pfd = bpf_create_map(def.type,
- def.key_size,
- def.value_size,
- def.max_entries);
+ *pfd = bpf_create_map(def->type,
+ def->key_size,
+ def->value_size,
+ def->max_entries);
if (*pfd < 0) {
size_t j;
int err = *pfd;
@@ -706,22 +774,17 @@ bpf_object__create_maps(struct bpf_object *obj)
pr_warning("failed to create map: %s\n",
strerror(errno));
for (j = 0; j < i; j++)
- zclose(obj->map_fds[j]);
- obj->nr_map_fds = 0;
- zfree(&obj->map_fds);
+ zclose(obj->maps[j].fd);
return err;
}
pr_debug("create map: fd=%d\n", *pfd);
- pfd++;
}
- zfree(&obj->maps_buf);
- obj->maps_buf_sz = 0;
return 0;
}
static int
-bpf_program__relocate(struct bpf_program *prog, int *map_fds)
+bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
{
int i;
@@ -741,7 +804,7 @@ bpf_program__relocate(struct bpf_program *prog, int *map_fds)
return -LIBBPF_ERRNO__RELOC;
}
insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
- insns[insn_idx].imm = map_fds[map_idx];
+ insns[insn_idx].imm = obj->maps[map_idx].fd;
}
zfree(&prog->reloc_desc);
@@ -760,7 +823,7 @@ bpf_object__relocate(struct bpf_object *obj)
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- err = bpf_program__relocate(prog, obj->map_fds);
+ err = bpf_program__relocate(prog, obj);
if (err) {
pr_warning("failed to relocate '%s'\n",
prog->section_name);
@@ -784,8 +847,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
Elf_Data *data = obj->efile.reloc[i].data;
int idx = shdr->sh_info;
struct bpf_program *prog;
- size_t nr_maps = obj->maps_buf_sz /
- sizeof(struct bpf_map_def);
+ size_t nr_maps = obj->nr_maps;
if (shdr->sh_type != SHT_REL) {
pr_warning("internal error at %d\n", __LINE__);
@@ -860,13 +922,73 @@ static int
bpf_program__load(struct bpf_program *prog,
char *license, u32 kern_version)
{
- int err, fd;
+ int err = 0, fd, i;
- err = load_program(prog->insns, prog->insns_cnt,
- license, kern_version, &fd);
- if (!err)
- prog->fd = fd;
+ if (prog->instances.nr < 0 || !prog->instances.fds) {
+ if (prog->preprocessor) {
+ pr_warning("Internal error: can't load program '%s'\n",
+ prog->section_name);
+ return -LIBBPF_ERRNO__INTERNAL;
+ }
+
+ prog->instances.fds = malloc(sizeof(int));
+ if (!prog->instances.fds) {
+ pr_warning("Not enough memory for BPF fds\n");
+ return -ENOMEM;
+ }
+ prog->instances.nr = 1;
+ prog->instances.fds[0] = -1;
+ }
+
+ if (!prog->preprocessor) {
+ if (prog->instances.nr != 1) {
+ pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
+ prog->section_name, prog->instances.nr);
+ }
+ err = load_program(prog->insns, prog->insns_cnt,
+ license, kern_version, &fd);
+ if (!err)
+ prog->instances.fds[0] = fd;
+ goto out;
+ }
+
+ for (i = 0; i < prog->instances.nr; i++) {
+ struct bpf_prog_prep_result result;
+ bpf_program_prep_t preprocessor = prog->preprocessor;
+
+ bzero(&result, sizeof(result));
+ err = preprocessor(prog, i, prog->insns,
+ prog->insns_cnt, &result);
+ if (err) {
+ pr_warning("Preprocessing the %dth instance of program '%s' failed\n",
+ i, prog->section_name);
+ goto out;
+ }
+
+ if (!result.new_insn_ptr || !result.new_insn_cnt) {
+ pr_debug("Skip loading the %dth instance of program '%s'\n",
+ i, prog->section_name);
+ prog->instances.fds[i] = -1;
+ if (result.pfd)
+ *result.pfd = -1;
+ continue;
+ }
+
+ err = load_program(result.new_insn_ptr,
+ result.new_insn_cnt,
+ license, kern_version, &fd);
+
+ if (err) {
+ pr_warning("Loading the %dth instance of program '%s' failed\n",
+ i, prog->section_name);
+ goto out;
+ }
+ if (result.pfd)
+ *result.pfd = fd;
+ prog->instances.fds[i] = fd;
+ }
+out:
if (err)
pr_warning("failed to load program '%s'\n",
prog->section_name);
@@ -970,10 +1092,8 @@ int bpf_object__unload(struct bpf_object *obj)
if (!obj)
return -EINVAL;
- for (i = 0; i < obj->nr_map_fds; i++)
- zclose(obj->map_fds[i]);
- zfree(&obj->map_fds);
- obj->nr_map_fds = 0;
+ for (i = 0; i < obj->nr_maps; i++)
+ zclose(obj->maps[i].fd);
for (i = 0; i < obj->nr_programs; i++)
bpf_program__unload(&obj->programs[i]);
@@ -1016,7 +1136,16 @@ void bpf_object__close(struct bpf_object *obj)
bpf_object__elf_finish(obj);
bpf_object__unload(obj);
- zfree(&obj->maps_buf);
+ for (i = 0; i < obj->nr_maps; i++) {
+ zfree(&obj->maps[i].name);
+ if (obj->maps[i].clear_priv)
+ obj->maps[i].clear_priv(&obj->maps[i],
+ obj->maps[i].priv);
+ obj->maps[i].priv = NULL;
+ obj->maps[i].clear_priv = NULL;
+ }
+ zfree(&obj->maps);
+ obj->nr_maps = 0;
if (obj->programs && obj->nr_programs) {
for (i = 0; i < obj->nr_programs; i++)
@@ -1121,5 +1250,142 @@ const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
int bpf_program__fd(struct bpf_program *prog)
{
- return prog->fd;
+ return bpf_program__nth_fd(prog, 0);
+}
+
+int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
+ bpf_program_prep_t prep)
+{
+ int *instances_fds;
+
+ if (nr_instances <= 0 || !prep)
+ return -EINVAL;
+
+ if (prog->instances.nr > 0 || prog->instances.fds) {
+ pr_warning("Can't set pre-processor after loading\n");
+ return -EINVAL;
+ }
+
+ instances_fds = malloc(sizeof(int) * nr_instances);
+ if (!instances_fds) {
+ pr_warning("alloc memory failed for fds\n");
+ return -ENOMEM;
+ }
+
+ /* fill all fd with -1 */
+ memset(instances_fds, -1, sizeof(int) * nr_instances);
+
+ prog->instances.nr = nr_instances;
+ prog->instances.fds = instances_fds;
+ prog->preprocessor = prep;
+ return 0;
+}
+
+int bpf_program__nth_fd(struct bpf_program *prog, int n)
+{
+ int fd;
+
+ if (n >= prog->instances.nr || n < 0) {
+ pr_warning("Can't get the %dth fd from program %s: only %d instances\n",
+ n, prog->section_name, prog->instances.nr);
+ return -EINVAL;
+ }
+
+ fd = prog->instances.fds[n];
+ if (fd < 0) {
+ pr_warning("%dth instance of program '%s' is invalid\n",
+ n, prog->section_name);
+ return -ENOENT;
+ }
+
+ return fd;
+}
+
+int bpf_map__get_fd(struct bpf_map *map)
+{
+ if (!map)
+ return -EINVAL;
+
+ return map->fd;
+}
+
+int bpf_map__get_def(struct bpf_map *map, struct bpf_map_def *pdef)
+{
+ if (!map || !pdef)
+ return -EINVAL;
+
+ *pdef = map->def;
+ return 0;
+}
+
+const char *bpf_map__get_name(struct bpf_map *map)
+{
+ if (!map)
+ return NULL;
+ return map->name;
+}
+
+int bpf_map__set_private(struct bpf_map *map, void *priv,
+ bpf_map_clear_priv_t clear_priv)
+{
+ if (!map)
+ return -EINVAL;
+
+ if (map->priv) {
+ if (map->clear_priv)
+ map->clear_priv(map, map->priv);
+ }
+
+ map->priv = priv;
+ map->clear_priv = clear_priv;
+ return 0;
+}
+
+int bpf_map__get_private(struct bpf_map *map, void **ppriv)
+{
+ if (!map)
+ return -EINVAL;
+
+ if (ppriv)
+ *ppriv = map->priv;
+ return 0;
+}
+
+struct bpf_map *
+bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)
+{
+ size_t idx;
+ struct bpf_map *s, *e;
+
+ if (!obj || !obj->maps)
+ return NULL;
+
+ s = obj->maps;
+ e = obj->maps + obj->nr_maps;
+
+ if (prev == NULL)
+ return s;
+
+ if ((prev < s) || (prev >= e)) {
+ pr_warning("error in %s: map handler doesn't belong to object\n",
+ __func__);
+ return NULL;
+ }
+
+ idx = (prev - obj->maps) + 1;
+ if (idx >= obj->nr_maps)
+ return NULL;
+ return &obj->maps[idx];
+}
+
+struct bpf_map *
+bpf_object__get_map_by_name(struct bpf_object *obj, const char *name)
+{
+ struct bpf_map *pos;
+
+ bpf_map__for_each(pos, obj) {
+ if (pos->name && !strcmp(pos->name, name))
+ return pos;
+ }
+ return NULL;
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index c9a9aef2806c..a51594c7b518 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -88,6 +88,70 @@ const char *bpf_program__title(struct bpf_program *prog, bool needs_copy);
int bpf_program__fd(struct bpf_program *prog);
+struct bpf_insn;
+
+/*
+ * Libbpf allows callers to adjust BPF programs before being loaded
+ * into kernel. One program in an object file can be transform into
+ * multiple variants to be attached to different code.
+ *
+ * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
+ * are APIs for this propose.
+ *
+ * - bpf_program_prep_t:
+ * It defines 'preprocessor', which is a caller defined function
+ * passed to libbpf through bpf_program__set_prep(), and will be
+ * called before program is loaded. The processor should adjust
+ * the program one time for each instances according to the number
+ * passed to it.
+ *
+ * - bpf_program__set_prep:
+ * Attachs a preprocessor to a BPF program. The number of instances
+ * whould be created is also passed through this function.
+ *
+ * - bpf_program__nth_fd:
+ * After the program is loaded, get resuling fds from bpf program for
+ * each instances.
+ *
+ * If bpf_program__set_prep() is not used, the program whould be loaded
+ * without adjustment during bpf_object__load(). The program has only
+ * one instance. In this case bpf_program__fd(prog) is equal to
+ * bpf_program__nth_fd(prog, 0).
+ */
+
+struct bpf_prog_prep_result {
+ /*
+ * If not NULL, load new instruction array.
+ * If set to NULL, don't load this instance.
+ */
+ struct bpf_insn *new_insn_ptr;
+ int new_insn_cnt;
+
+ /* If not NULL, result fd is set to it */
+ int *pfd;
+};
+
+/*
+ * Parameters of bpf_program_prep_t:
+ * - prog: The bpf_program being loaded.
+ * - n: Index of instance being generated.
+ * - insns: BPF instructions array.
+ * - insns_cnt:Number of instructions in insns.
+ * - res: Output parameter, result of transformation.
+ *
+ * Return value:
+ * - Zero: pre-processing success.
+ * - Non-zero: pre-processing, stop loading.
+ */
+typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
+ struct bpf_insn *insns, int insns_cnt,
+ struct bpf_prog_prep_result *res);
+
+int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
+ bpf_program_prep_t prep);
+
+int bpf_program__nth_fd(struct bpf_program *prog, int n);
+
/*
* We don't need __attribute__((packed)) now since it is
* unnecessary for 'bpf_map_def' because they are all aligned.
@@ -101,4 +165,28 @@ struct bpf_map_def {
unsigned int max_entries;
};
+/*
+ * There is another 'struct bpf_map' in include/linux/map.h. However,
+ * it is not a uapi header so no need to consider name clash.
+ */
+struct bpf_map;
+struct bpf_map *
+bpf_object__get_map_by_name(struct bpf_object *obj, const char *name);
+
+struct bpf_map *
+bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
+#define bpf_map__for_each(pos, obj) \
+ for ((pos) = bpf_map__next(NULL, (obj)); \
+ (pos) != NULL; \
+ (pos) = bpf_map__next((pos), (obj)))
+
+int bpf_map__get_fd(struct bpf_map *map);
+int bpf_map__get_def(struct bpf_map *map, struct bpf_map_def *pdef);
+const char *bpf_map__get_name(struct bpf_map *map);
+
+typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
+int bpf_map__set_private(struct bpf_map *map, void *priv,
+ bpf_map_clear_priv_t clear_priv);
+int bpf_map__get_private(struct bpf_map *map, void **ppriv);
+
#endif
diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c
new file mode 100644
index 000000000000..9122a9e80046
--- /dev/null
+++ b/tools/lib/find_bit.c
@@ -0,0 +1,84 @@
+/* bit search implementation
+ *
+ * Copied from lib/find_bit.c to tools/lib/find_bit.c
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * Copyright (C) 2008 IBM Corporation
+ * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au>
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
+ * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
+ * size and improve performance, 2015.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <linux/bitmap.h>
+#include <linux/kernel.h>
+
+#if !defined(find_next_bit)
+
+/*
+ * This is a common helper function for find_next_bit and
+ * find_next_zero_bit. The difference is the "invert" argument, which
+ * is XORed with each fetched word before searching it for one bits.
+ */
+static unsigned long _find_next_bit(const unsigned long *addr,
+ unsigned long nbits, unsigned long start, unsigned long invert)
+{
+ unsigned long tmp;
+
+ if (!nbits || start >= nbits)
+ return nbits;
+
+ tmp = addr[start / BITS_PER_LONG] ^ invert;
+
+ /* Handle 1st word. */
+ tmp &= BITMAP_FIRST_WORD_MASK(start);
+ start = round_down(start, BITS_PER_LONG);
+
+ while (!tmp) {
+ start += BITS_PER_LONG;
+ if (start >= nbits)
+ return nbits;
+
+ tmp = addr[start / BITS_PER_LONG] ^ invert;
+ }
+
+ return min(start + __ffs(tmp), nbits);
+}
+#endif
+
+#ifndef find_next_bit
+/*
+ * Find the next set bit in a memory region.
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ return _find_next_bit(addr, size, offset, 0UL);
+}
+#endif
+
+#ifndef find_first_bit
+/*
+ * Find the first set bit in a memory region.
+ */
+unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+{
+ unsigned long idx;
+
+ for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+ if (addr[idx])
+ return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
+ }
+
+ return size;
+}
+#endif
diff --git a/tools/lib/string.c b/tools/lib/string.c
new file mode 100644
index 000000000000..bd239bc1d557
--- /dev/null
+++ b/tools/lib/string.c
@@ -0,0 +1,89 @@
+/*
+ * linux/tools/lib/string.c
+ *
+ * Copied from linux/lib/string.c, where it is:
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * More specifically, the first copied function was strtobool, which
+ * was introduced by:
+ *
+ * d0f1fed29e6e ("Add a strtobool function matching semantics of existing in kernel equivalents")
+ * Author: Jonathan Cameron <jic23@cam.ac.uk>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/string.h>
+#include <linux/compiler.h>
+
+/**
+ * memdup - duplicate region of memory
+ *
+ * @src: memory region to duplicate
+ * @len: memory region length
+ */
+void *memdup(const void *src, size_t len)
+{
+ void *p = malloc(len);
+
+ if (p)
+ memcpy(p, src, len);
+
+ return p;
+}
+
+/**
+ * strtobool - convert common user inputs into boolean values
+ * @s: input string
+ * @res: result
+ *
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
+ * Otherwise it will return -EINVAL. Value pointed to by res is
+ * updated upon finding a match.
+ */
+int strtobool(const char *s, bool *res)
+{
+ switch (s[0]) {
+ case 'y':
+ case 'Y':
+ case '1':
+ *res = true;
+ break;
+ case 'n':
+ case 'N':
+ case '0':
+ *res = false;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * strlcpy - Copy a C-string into a sized buffer
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @size: size of destination buffer
+ *
+ * Compatible with *BSD: the result is always a valid
+ * NUL-terminated string that fits in the buffer (unless,
+ * of course, the buffer size is zero). It does not pad
+ * out the result like strncpy() does.
+ *
+ * If libc has strlcpy() then that version will override this
+ * implementation:
+ */
+size_t __weak strlcpy(char *dest, const char *src, size_t size)
+{
+ size_t ret = strlen(src);
+
+ if (size) {
+ size_t len = (ret >= size) ? size - 1 : ret;
+ memcpy(dest, src, len);
+ dest[len] = '\0';
+ }
+ return ret;
+}
diff --git a/tools/lib/subcmd/Build b/tools/lib/subcmd/Build
new file mode 100644
index 000000000000..ee31288788c1
--- /dev/null
+++ b/tools/lib/subcmd/Build
@@ -0,0 +1,7 @@
+libsubcmd-y += exec-cmd.o
+libsubcmd-y += help.o
+libsubcmd-y += pager.o
+libsubcmd-y += parse-options.o
+libsubcmd-y += run-command.o
+libsubcmd-y += sigchain.o
+libsubcmd-y += subcmd-config.o
diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile
new file mode 100644
index 000000000000..629cf8c14e68
--- /dev/null
+++ b/tools/lib/subcmd/Makefile
@@ -0,0 +1,48 @@
+include ../../scripts/Makefile.include
+include ../../perf/config/utilities.mak # QUIET_CLEAN
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(shell pwd)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+CC = $(CROSS_COMPILE)gcc
+AR = $(CROSS_COMPILE)ar
+RM = rm -f
+
+MAKEFLAGS += --no-print-directory
+
+LIBFILE = $(OUTPUT)libsubcmd.a
+
+CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
+CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
+
+CFLAGS += -I$(srctree)/tools/include/
+CFLAGS += -I$(srctree)/include/uapi
+CFLAGS += -I$(srctree)/include
+
+SUBCMD_IN := $(OUTPUT)libsubcmd-in.o
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+include $(srctree)/tools/build/Makefile.include
+
+all: fixdep $(LIBFILE)
+
+$(SUBCMD_IN): FORCE
+ @$(MAKE) $(build)=libsubcmd
+
+$(LIBFILE): $(SUBCMD_IN)
+ $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SUBCMD_IN)
+
+clean:
+ $(call QUIET_CLEAN, libsubcmd) $(RM) $(LIBFILE); \
+ find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
+
+FORCE:
+
+.PHONY: clean FORCE
diff --git a/tools/lib/subcmd/exec-cmd.c b/tools/lib/subcmd/exec-cmd.c
new file mode 100644
index 000000000000..1ae833af1a4a
--- /dev/null
+++ b/tools/lib/subcmd/exec-cmd.c
@@ -0,0 +1,209 @@
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "subcmd-util.h"
+#include "exec-cmd.h"
+#include "subcmd-config.h"
+
+#define MAX_ARGS 32
+#define PATH_MAX 4096
+
+static const char *argv_exec_path;
+static const char *argv0_path;
+
+void exec_cmd_init(const char *exec_name, const char *prefix,
+ const char *exec_path, const char *exec_path_env)
+{
+ subcmd_config.exec_name = exec_name;
+ subcmd_config.prefix = prefix;
+ subcmd_config.exec_path = exec_path;
+ subcmd_config.exec_path_env = exec_path_env;
+}
+
+#define is_dir_sep(c) ((c) == '/')
+
+static int is_absolute_path(const char *path)
+{
+ return path[0] == '/';
+}
+
+static const char *get_pwd_cwd(void)
+{
+ static char cwd[PATH_MAX + 1];
+ char *pwd;
+ struct stat cwd_stat, pwd_stat;
+ if (getcwd(cwd, PATH_MAX) == NULL)
+ return NULL;
+ pwd = getenv("PWD");
+ if (pwd && strcmp(pwd, cwd)) {
+ stat(cwd, &cwd_stat);
+ if (!stat(pwd, &pwd_stat) &&
+ pwd_stat.st_dev == cwd_stat.st_dev &&
+ pwd_stat.st_ino == cwd_stat.st_ino) {
+ strlcpy(cwd, pwd, PATH_MAX);
+ }
+ }
+ return cwd;
+}
+
+static const char *make_nonrelative_path(const char *path)
+{
+ static char buf[PATH_MAX + 1];
+
+ if (is_absolute_path(path)) {
+ if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
+ die("Too long path: %.*s", 60, path);
+ } else {
+ const char *cwd = get_pwd_cwd();
+ if (!cwd)
+ die("Cannot determine the current working directory");
+ if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
+ die("Too long path: %.*s", 60, path);
+ }
+ return buf;
+}
+
+char *system_path(const char *path)
+{
+ char *buf = NULL;
+
+ if (is_absolute_path(path))
+ return strdup(path);
+
+ astrcatf(&buf, "%s/%s", subcmd_config.prefix, path);
+
+ return buf;
+}
+
+const char *extract_argv0_path(const char *argv0)
+{
+ const char *slash;
+
+ if (!argv0 || !*argv0)
+ return NULL;
+ slash = argv0 + strlen(argv0);
+
+ while (argv0 <= slash && !is_dir_sep(*slash))
+ slash--;
+
+ if (slash >= argv0) {
+ argv0_path = strndup(argv0, slash - argv0);
+ return argv0_path ? slash + 1 : NULL;
+ }
+
+ return argv0;
+}
+
+void set_argv_exec_path(const char *exec_path)
+{
+ argv_exec_path = exec_path;
+ /*
+ * Propagate this setting to external programs.
+ */
+ setenv(subcmd_config.exec_path_env, exec_path, 1);
+}
+
+
+/* Returns the highest-priority location to look for subprograms. */
+char *get_argv_exec_path(void)
+{
+ char *env;
+
+ if (argv_exec_path)
+ return strdup(argv_exec_path);
+
+ env = getenv(subcmd_config.exec_path_env);
+ if (env && *env)
+ return strdup(env);
+
+ return system_path(subcmd_config.exec_path);
+}
+
+static void add_path(char **out, const char *path)
+{
+ if (path && *path) {
+ if (is_absolute_path(path))
+ astrcat(out, path);
+ else
+ astrcat(out, make_nonrelative_path(path));
+
+ astrcat(out, ":");
+ }
+}
+
+void setup_path(void)
+{
+ const char *old_path = getenv("PATH");
+ char *new_path = NULL;
+ char *tmp = get_argv_exec_path();
+
+ add_path(&new_path, tmp);
+ add_path(&new_path, argv0_path);
+ free(tmp);
+
+ if (old_path)
+ astrcat(&new_path, old_path);
+ else
+ astrcat(&new_path, "/usr/local/bin:/usr/bin:/bin");
+
+ setenv("PATH", new_path, 1);
+
+ free(new_path);
+}
+
+static const char **prepare_exec_cmd(const char **argv)
+{
+ int argc;
+ const char **nargv;
+
+ for (argc = 0; argv[argc]; argc++)
+ ; /* just counting */
+ nargv = malloc(sizeof(*nargv) * (argc + 2));
+
+ nargv[0] = subcmd_config.exec_name;
+ for (argc = 0; argv[argc]; argc++)
+ nargv[argc + 1] = argv[argc];
+ nargv[argc + 1] = NULL;
+ return nargv;
+}
+
+int execv_cmd(const char **argv) {
+ const char **nargv = prepare_exec_cmd(argv);
+
+ /* execvp() can only ever return if it fails */
+ execvp(subcmd_config.exec_name, (char **)nargv);
+
+ free(nargv);
+ return -1;
+}
+
+
+int execl_cmd(const char *cmd,...)
+{
+ int argc;
+ const char *argv[MAX_ARGS + 1];
+ const char *arg;
+ va_list param;
+
+ va_start(param, cmd);
+ argv[0] = cmd;
+ argc = 1;
+ while (argc < MAX_ARGS) {
+ arg = argv[argc++] = va_arg(param, char *);
+ if (!arg)
+ break;
+ }
+ va_end(param);
+ if (MAX_ARGS <= argc) {
+ fprintf(stderr, " Error: too many args to run %s\n", cmd);
+ return -1;
+ }
+
+ argv[argc] = NULL;
+ return execv_cmd(argv);
+}
diff --git a/tools/lib/subcmd/exec-cmd.h b/tools/lib/subcmd/exec-cmd.h
new file mode 100644
index 000000000000..5d08bda31d90
--- /dev/null
+++ b/tools/lib/subcmd/exec-cmd.h
@@ -0,0 +1,16 @@
+#ifndef __SUBCMD_EXEC_CMD_H
+#define __SUBCMD_EXEC_CMD_H
+
+extern void exec_cmd_init(const char *exec_name, const char *prefix,
+ const char *exec_path, const char *exec_path_env);
+
+extern void set_argv_exec_path(const char *exec_path);
+extern const char *extract_argv0_path(const char *path);
+extern void setup_path(void);
+extern int execv_cmd(const char **argv); /* NULL terminated */
+extern int execl_cmd(const char *cmd, ...);
+/* get_argv_exec_path and system_path return malloc'd string, caller must free it */
+extern char *get_argv_exec_path(void);
+extern char *system_path(const char *path);
+
+#endif /* __SUBCMD_EXEC_CMD_H */
diff --git a/tools/perf/util/help.c b/tools/lib/subcmd/help.c
index 86c37c472263..e228c3cb3716 100644
--- a/tools/perf/util/help.c
+++ b/tools/lib/subcmd/help.c
@@ -1,9 +1,15 @@
-#include "cache.h"
-#include "../builtin.h"
-#include "exec_cmd.h"
-#include "levenshtein.h"
-#include "help.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
#include <termios.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <dirent.h>
+#include "subcmd-util.h"
+#include "help.h"
+#include "exec-cmd.h"
void add_cmdname(struct cmdnames *cmds, const char *name, size_t len)
{
@@ -17,7 +23,7 @@ void add_cmdname(struct cmdnames *cmds, const char *name, size_t len)
cmds->names[cmds->cnt++] = ent;
}
-static void clean_cmdnames(struct cmdnames *cmds)
+void clean_cmdnames(struct cmdnames *cmds)
{
unsigned int i;
@@ -28,14 +34,14 @@ static void clean_cmdnames(struct cmdnames *cmds)
cmds->alloc = 0;
}
-static int cmdname_compare(const void *a_, const void *b_)
+int cmdname_compare(const void *a_, const void *b_)
{
struct cmdname *a = *(struct cmdname **)a_;
struct cmdname *b = *(struct cmdname **)b_;
return strcmp(a->name, b->name);
}
-static void uniq(struct cmdnames *cmds)
+void uniq(struct cmdnames *cmds)
{
unsigned int i, j;
@@ -71,6 +77,28 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
cmds->cnt = cj;
}
+static void get_term_dimensions(struct winsize *ws)
+{
+ char *s = getenv("LINES");
+
+ if (s != NULL) {
+ ws->ws_row = atoi(s);
+ s = getenv("COLUMNS");
+ if (s != NULL) {
+ ws->ws_col = atoi(s);
+ if (ws->ws_row && ws->ws_col)
+ return;
+ }
+ }
+#ifdef TIOCGWINSZ
+ if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
+ ws->ws_row && ws->ws_col)
+ return;
+#endif
+ ws->ws_row = 25;
+ ws->ws_col = 80;
+}
+
static void pretty_print_string_list(struct cmdnames *cmds, int longest)
{
int cols = 1, rows;
@@ -114,6 +142,14 @@ static int is_executable(const char *name)
return st.st_mode & S_IXUSR;
}
+static int has_extension(const char *filename, const char *ext)
+{
+ size_t len = strlen(filename);
+ size_t extlen = strlen(ext);
+
+ return len > extlen && !memcmp(filename + len - extlen, ext, extlen);
+}
+
static void list_commands_in_dir(struct cmdnames *cmds,
const char *path,
const char *prefix)
@@ -121,8 +157,7 @@ static void list_commands_in_dir(struct cmdnames *cmds,
int prefix_len;
DIR *dir = opendir(path);
struct dirent *de;
- struct strbuf buf = STRBUF_INIT;
- int len;
+ char *buf = NULL;
if (!dir)
return;
@@ -130,8 +165,7 @@ static void list_commands_in_dir(struct cmdnames *cmds,
prefix = "perf-";
prefix_len = strlen(prefix);
- strbuf_addf(&buf, "%s/", path);
- len = buf.len;
+ astrcatf(&buf, "%s/", path);
while ((de = readdir(dir)) != NULL) {
int entlen;
@@ -139,9 +173,8 @@ static void list_commands_in_dir(struct cmdnames *cmds,
if (prefixcmp(de->d_name, prefix))
continue;
- strbuf_setlen(&buf, len);
- strbuf_addstr(&buf, de->d_name);
- if (!is_executable(buf.buf))
+ astrcat(&buf, de->d_name);
+ if (!is_executable(buf))
continue;
entlen = strlen(de->d_name) - prefix_len;
@@ -151,7 +184,7 @@ static void list_commands_in_dir(struct cmdnames *cmds,
add_cmdname(cmds, de->d_name + prefix_len, entlen);
}
closedir(dir);
- strbuf_release(&buf);
+ free(buf);
}
void load_command_list(const char *prefix,
@@ -159,7 +192,7 @@ void load_command_list(const char *prefix,
struct cmdnames *other_cmds)
{
const char *env_path = getenv("PATH");
- const char *exec_path = perf_exec_path();
+ char *exec_path = get_argv_exec_path();
if (exec_path) {
list_commands_in_dir(main_cmds, exec_path, prefix);
@@ -172,7 +205,7 @@ void load_command_list(const char *prefix,
char *paths, *path, *colon;
path = paths = strdup(env_path);
while (1) {
- if ((colon = strchr(path, PATH_SEP)))
+ if ((colon = strchr(path, ':')))
*colon = 0;
if (!exec_path || strcmp(path, exec_path))
list_commands_in_dir(other_cmds, path, prefix);
@@ -187,6 +220,7 @@ void load_command_list(const char *prefix,
sizeof(*other_cmds->names), cmdname_compare);
uniq(other_cmds);
}
+ free(exec_path);
exclude_cmds(other_cmds, main_cmds);
}
@@ -203,13 +237,14 @@ void list_commands(const char *title, struct cmdnames *main_cmds,
longest = other_cmds->names[i]->len;
if (main_cmds->cnt) {
- const char *exec_path = perf_exec_path();
+ char *exec_path = get_argv_exec_path();
printf("available %s in '%s'\n", title, exec_path);
printf("----------------");
mput_char('-', strlen(title) + strlen(exec_path));
putchar('\n');
pretty_print_string_list(main_cmds, longest);
putchar('\n');
+ free(exec_path);
}
if (other_cmds->cnt) {
@@ -231,109 +266,3 @@ int is_in_cmdlist(struct cmdnames *c, const char *s)
return 1;
return 0;
}
-
-static int autocorrect;
-static struct cmdnames aliases;
-
-static int perf_unknown_cmd_config(const char *var, const char *value, void *cb)
-{
- if (!strcmp(var, "help.autocorrect"))
- autocorrect = perf_config_int(var,value);
- /* Also use aliases for command lookup */
- if (!prefixcmp(var, "alias."))
- add_cmdname(&aliases, var + 6, strlen(var + 6));
-
- return perf_default_config(var, value, cb);
-}
-
-static int levenshtein_compare(const void *p1, const void *p2)
-{
- const struct cmdname *const *c1 = p1, *const *c2 = p2;
- const char *s1 = (*c1)->name, *s2 = (*c2)->name;
- int l1 = (*c1)->len;
- int l2 = (*c2)->len;
- return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
-}
-
-static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
-{
- unsigned int i;
-
- ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc);
-
- for (i = 0; i < old->cnt; i++)
- cmds->names[cmds->cnt++] = old->names[i];
- zfree(&old->names);
- old->cnt = 0;
-}
-
-const char *help_unknown_cmd(const char *cmd)
-{
- unsigned int i, n = 0, best_similarity = 0;
- struct cmdnames main_cmds, other_cmds;
-
- memset(&main_cmds, 0, sizeof(main_cmds));
- memset(&other_cmds, 0, sizeof(main_cmds));
- memset(&aliases, 0, sizeof(aliases));
-
- perf_config(perf_unknown_cmd_config, NULL);
-
- load_command_list("perf-", &main_cmds, &other_cmds);
-
- add_cmd_list(&main_cmds, &aliases);
- add_cmd_list(&main_cmds, &other_cmds);
- qsort(main_cmds.names, main_cmds.cnt,
- sizeof(main_cmds.names), cmdname_compare);
- uniq(&main_cmds);
-
- if (main_cmds.cnt) {
- /* This reuses cmdname->len for similarity index */
- for (i = 0; i < main_cmds.cnt; ++i)
- main_cmds.names[i]->len =
- levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
-
- qsort(main_cmds.names, main_cmds.cnt,
- sizeof(*main_cmds.names), levenshtein_compare);
-
- best_similarity = main_cmds.names[0]->len;
- n = 1;
- while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
- ++n;
- }
-
- if (autocorrect && n == 1) {
- const char *assumed = main_cmds.names[0]->name;
-
- main_cmds.names[0] = NULL;
- clean_cmdnames(&main_cmds);
- fprintf(stderr, "WARNING: You called a perf program named '%s', "
- "which does not exist.\n"
- "Continuing under the assumption that you meant '%s'\n",
- cmd, assumed);
- if (autocorrect > 0) {
- fprintf(stderr, "in %0.1f seconds automatically...\n",
- (float)autocorrect/10.0);
- poll(NULL, 0, autocorrect * 100);
- }
- return assumed;
- }
-
- fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd);
-
- if (main_cmds.cnt && best_similarity < 6) {
- fprintf(stderr, "\nDid you mean %s?\n",
- n < 2 ? "this": "one of these");
-
- for (i = 0; i < n; i++)
- fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
- }
-
- exit(1);
-}
-
-int cmd_version(int argc __maybe_unused, const char **argv __maybe_unused,
- const char *prefix __maybe_unused)
-{
- printf("perf version %s\n", perf_version_string);
- return 0;
-}
diff --git a/tools/perf/util/help.h b/tools/lib/subcmd/help.h
index 7f5c6dedd714..e145a020780c 100644
--- a/tools/perf/util/help.h
+++ b/tools/lib/subcmd/help.h
@@ -1,12 +1,14 @@
-#ifndef __PERF_HELP_H
-#define __PERF_HELP_H
+#ifndef __SUBCMD_HELP_H
+#define __SUBCMD_HELP_H
+
+#include <sys/types.h>
struct cmdnames {
size_t alloc;
size_t cnt;
struct cmdname {
size_t len; /* also used for similarity index in help.c */
- char name[FLEX_ARRAY];
+ char name[];
} **names;
};
@@ -20,10 +22,13 @@ void load_command_list(const char *prefix,
struct cmdnames *main_cmds,
struct cmdnames *other_cmds);
void add_cmdname(struct cmdnames *cmds, const char *name, size_t len);
+void clean_cmdnames(struct cmdnames *cmds);
+int cmdname_compare(const void *a, const void *b);
+void uniq(struct cmdnames *cmds);
/* Here we require that excludes is a sorted list. */
void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes);
int is_in_cmdlist(struct cmdnames *c, const char *s);
void list_commands(const char *title, struct cmdnames *main_cmds,
struct cmdnames *other_cmds);
-#endif /* __PERF_HELP_H */
+#endif /* __SUBCMD_HELP_H */
diff --git a/tools/perf/util/pager.c b/tools/lib/subcmd/pager.c
index 53ef006a951c..d50f3b58606b 100644
--- a/tools/perf/util/pager.c
+++ b/tools/lib/subcmd/pager.c
@@ -1,6 +1,12 @@
-#include "cache.h"
+#include <sys/select.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include "pager.h"
#include "run-command.h"
#include "sigchain.h"
+#include "subcmd-config.h"
/*
* This is split up from the rest of git so that we can do
@@ -9,6 +15,11 @@
static int spawned_pager;
+void pager_init(const char *pager_env)
+{
+ subcmd_config.pager_env = pager_env;
+}
+
static void pager_preexec(void)
{
/*
@@ -46,7 +57,7 @@ static void wait_for_pager_signal(int signo)
void setup_pager(void)
{
- const char *pager = getenv("PERF_PAGER");
+ const char *pager = getenv(subcmd_config.pager_env);
if (!isatty(1))
return;
@@ -85,11 +96,5 @@ void setup_pager(void)
int pager_in_use(void)
{
- const char *env;
-
- if (spawned_pager)
- return 1;
-
- env = getenv("PERF_PAGER_IN_USE");
- return env ? perf_config_bool("PERF_PAGER_IN_USE", env) : 0;
+ return spawned_pager;
}
diff --git a/tools/lib/subcmd/pager.h b/tools/lib/subcmd/pager.h
new file mode 100644
index 000000000000..8b83714ecf73
--- /dev/null
+++ b/tools/lib/subcmd/pager.h
@@ -0,0 +1,9 @@
+#ifndef __SUBCMD_PAGER_H
+#define __SUBCMD_PAGER_H
+
+extern void pager_init(const char *pager_env);
+
+extern void setup_pager(void);
+extern int pager_in_use(void);
+
+#endif /* __SUBCMD_PAGER_H */
diff --git a/tools/perf/util/parse-options.c b/tools/lib/subcmd/parse-options.c
index 9fca09296eb3..981bb4481fd5 100644
--- a/tools/perf/util/parse-options.c
+++ b/tools/lib/subcmd/parse-options.c
@@ -1,37 +1,66 @@
-#include "util.h"
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <ctype.h>
+#include "subcmd-util.h"
#include "parse-options.h"
-#include "cache.h"
-#include "header.h"
-#include <linux/string.h>
+#include "subcmd-config.h"
+#include "pager.h"
#define OPT_SHORT 1
#define OPT_UNSET 2
-static struct strbuf error_buf = STRBUF_INIT;
+char *error_buf;
static int opterror(const struct option *opt, const char *reason, int flags)
{
if (flags & OPT_SHORT)
- return error("switch `%c' %s", opt->short_name, reason);
- if (flags & OPT_UNSET)
- return error("option `no-%s' %s", opt->long_name, reason);
- return error("option `%s' %s", opt->long_name, reason);
+ fprintf(stderr, " Error: switch `%c' %s", opt->short_name, reason);
+ else if (flags & OPT_UNSET)
+ fprintf(stderr, " Error: option `no-%s' %s", opt->long_name, reason);
+ else
+ fprintf(stderr, " Error: option `%s' %s", opt->long_name, reason);
+
+ return -1;
+}
+
+static const char *skip_prefix(const char *str, const char *prefix)
+{
+ size_t len = strlen(prefix);
+ return strncmp(str, prefix, len) ? NULL : str + len;
+}
+
+static void optwarning(const struct option *opt, const char *reason, int flags)
+{
+ if (flags & OPT_SHORT)
+ fprintf(stderr, " Warning: switch `%c' %s", opt->short_name, reason);
+ else if (flags & OPT_UNSET)
+ fprintf(stderr, " Warning: option `no-%s' %s", opt->long_name, reason);
+ else
+ fprintf(stderr, " Warning: option `%s' %s", opt->long_name, reason);
}
static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt,
int flags, const char **arg)
{
+ const char *res;
+
if (p->opt) {
- *arg = p->opt;
+ res = p->opt;
p->opt = NULL;
} else if ((opt->flags & PARSE_OPT_LASTARG_DEFAULT) && (p->argc == 1 ||
**(p->argv + 1) == '-')) {
- *arg = (const char *)opt->defval;
+ res = (const char *)opt->defval;
} else if (p->argc > 1) {
p->argc--;
- *arg = *++p->argv;
+ res = *++p->argv;
} else
return opterror(opt, "requires a value", flags);
+ if (arg)
+ *arg = res;
return 0;
}
@@ -55,11 +84,11 @@ static int get_value(struct parse_opt_ctx_t *p,
if (((flags & OPT_SHORT) && p->excl_opt->short_name) ||
p->excl_opt->long_name == NULL) {
- scnprintf(msg, sizeof(msg), "cannot be used with switch `%c'",
- p->excl_opt->short_name);
+ snprintf(msg, sizeof(msg), "cannot be used with switch `%c'",
+ p->excl_opt->short_name);
} else {
- scnprintf(msg, sizeof(msg), "cannot be used with %s",
- p->excl_opt->long_name);
+ snprintf(msg, sizeof(msg), "cannot be used with %s",
+ p->excl_opt->long_name);
}
opterror(opt, msg, flags);
return -3;
@@ -91,6 +120,64 @@ static int get_value(struct parse_opt_ctx_t *p,
}
}
+ if (opt->flags & PARSE_OPT_NOBUILD) {
+ char reason[128];
+ bool noarg = false;
+
+ err = snprintf(reason, sizeof(reason),
+ opt->flags & PARSE_OPT_CANSKIP ?
+ "is being ignored because %s " :
+ "is not available because %s",
+ opt->build_opt);
+ reason[sizeof(reason) - 1] = '\0';
+
+ if (err < 0)
+ strncpy(reason, opt->flags & PARSE_OPT_CANSKIP ?
+ "is being ignored" :
+ "is not available",
+ sizeof(reason));
+
+ if (!(opt->flags & PARSE_OPT_CANSKIP))
+ return opterror(opt, reason, flags);
+
+ err = 0;
+ if (unset)
+ noarg = true;
+ if (opt->flags & PARSE_OPT_NOARG)
+ noarg = true;
+ if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+ noarg = true;
+
+ switch (opt->type) {
+ case OPTION_BOOLEAN:
+ case OPTION_INCR:
+ case OPTION_BIT:
+ case OPTION_SET_UINT:
+ case OPTION_SET_PTR:
+ case OPTION_END:
+ case OPTION_ARGUMENT:
+ case OPTION_GROUP:
+ noarg = true;
+ break;
+ case OPTION_CALLBACK:
+ case OPTION_STRING:
+ case OPTION_INTEGER:
+ case OPTION_UINTEGER:
+ case OPTION_LONG:
+ case OPTION_U64:
+ default:
+ break;
+ }
+
+ if (!noarg)
+ err = get_arg(p, opt, flags, NULL);
+ if (err)
+ return err;
+
+ optwarning(opt, reason, flags);
+ return 0;
+ }
+
switch (opt->type) {
case OPTION_BIT:
if (unset)
@@ -327,14 +414,16 @@ match:
return get_value(p, options, flags);
}
- if (ambiguous_option)
- return error("Ambiguous option: %s "
- "(could be --%s%s or --%s%s)",
- arg,
- (ambiguous_flags & OPT_UNSET) ? "no-" : "",
- ambiguous_option->long_name,
- (abbrev_flags & OPT_UNSET) ? "no-" : "",
- abbrev_option->long_name);
+ if (ambiguous_option) {
+ fprintf(stderr,
+ " Error: Ambiguous option: %s (could be --%s%s or --%s%s)",
+ arg,
+ (ambiguous_flags & OPT_UNSET) ? "no-" : "",
+ ambiguous_option->long_name,
+ (abbrev_flags & OPT_UNSET) ? "no-" : "",
+ abbrev_option->long_name);
+ return -1;
+ }
if (abbrev_option)
return get_value(p, abbrev_option, abbrev_flags);
return -2;
@@ -346,7 +435,7 @@ static void check_typos(const char *arg, const struct option *options)
return;
if (!prefixcmp(arg, "no-")) {
- error ("did you mean `--%s` (with two dashes ?)", arg);
+ fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg);
exit(129);
}
@@ -354,14 +443,14 @@ static void check_typos(const char *arg, const struct option *options)
if (!options->long_name)
continue;
if (!prefixcmp(options->long_name, arg)) {
- error ("did you mean `--%s` (with two dashes ?)", arg);
+ fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg);
exit(129);
}
}
}
-void parse_options_start(struct parse_opt_ctx_t *ctx,
- int argc, const char **argv, int flags)
+static void parse_options_start(struct parse_opt_ctx_t *ctx,
+ int argc, const char **argv, int flags)
{
memset(ctx, 0, sizeof(*ctx));
ctx->argc = argc - 1;
@@ -378,9 +467,9 @@ static int usage_with_options_internal(const char * const *,
const struct option *, int,
struct parse_opt_ctx_t *);
-int parse_options_step(struct parse_opt_ctx_t *ctx,
- const struct option *options,
- const char * const usagestr[])
+static int parse_options_step(struct parse_opt_ctx_t *ctx,
+ const struct option *options,
+ const char * const usagestr[])
{
int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP);
int excl_short_opt = 1;
@@ -489,7 +578,7 @@ exclusive:
return PARSE_OPT_HELP;
}
-int parse_options_end(struct parse_opt_ctx_t *ctx)
+static int parse_options_end(struct parse_opt_ctx_t *ctx)
{
memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out));
ctx->out[ctx->cpidx + ctx->argc] = NULL;
@@ -501,22 +590,20 @@ int parse_options_subcommand(int argc, const char **argv, const struct option *o
{
struct parse_opt_ctx_t ctx;
- perf_env__set_cmdline(&perf_env, argc, argv);
-
/* build usage string if it's not provided */
if (subcommands && !usagestr[0]) {
- struct strbuf buf = STRBUF_INIT;
+ char *buf = NULL;
+
+ astrcatf(&buf, "%s %s [<options>] {", subcmd_config.exec_name, argv[0]);
- strbuf_addf(&buf, "perf %s [<options>] {", argv[0]);
for (int i = 0; subcommands[i]; i++) {
if (i)
- strbuf_addstr(&buf, "|");
- strbuf_addstr(&buf, subcommands[i]);
+ astrcat(&buf, "|");
+ astrcat(&buf, subcommands[i]);
}
- strbuf_addstr(&buf, "}");
+ astrcat(&buf, "}");
- usagestr[0] = strdup(buf.buf);
- strbuf_release(&buf);
+ usagestr[0] = buf;
}
parse_options_start(&ctx, argc, argv, flags);
@@ -541,13 +628,11 @@ int parse_options_subcommand(int argc, const char **argv, const struct option *o
putchar('\n');
exit(130);
default: /* PARSE_OPT_UNKNOWN */
- if (ctx.argv[0][1] == '-') {
- strbuf_addf(&error_buf, "unknown option `%s'",
- ctx.argv[0] + 2);
- } else {
- strbuf_addf(&error_buf, "unknown switch `%c'",
- *ctx.opt);
- }
+ if (ctx.argv[0][1] == '-')
+ astrcatf(&error_buf, "unknown option `%s'",
+ ctx.argv[0] + 2);
+ else
+ astrcatf(&error_buf, "unknown switch `%c'", *ctx.opt);
usage_with_options(usagestr, options);
}
@@ -647,6 +732,10 @@ static void print_option_help(const struct option *opts, int full)
pad = USAGE_OPTS_WIDTH;
}
fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help);
+ if (opts->flags & PARSE_OPT_NOBUILD)
+ fprintf(stderr, "%*s(not built-in because %s)\n",
+ USAGE_OPTS_WIDTH + USAGE_GAP, "",
+ opts->build_opt);
}
static int option__cmp(const void *va, const void *vb)
@@ -672,16 +761,18 @@ static int option__cmp(const void *va, const void *vb)
static struct option *options__order(const struct option *opts)
{
- int nr_opts = 0;
+ int nr_opts = 0, len;
const struct option *o = opts;
struct option *ordered;
for (o = opts; o->type != OPTION_END; o++)
++nr_opts;
- ordered = memdup(opts, sizeof(*o) * (nr_opts + 1));
- if (ordered == NULL)
+ len = sizeof(*o) * (nr_opts + 1);
+ ordered = malloc(len);
+ if (!ordered)
goto out;
+ memcpy(ordered, opts, len);
qsort(ordered, nr_opts, sizeof(*o), option__cmp);
out:
@@ -719,9 +810,9 @@ static bool option__in_argv(const struct option *opt, const struct parse_opt_ctx
return false;
}
-int usage_with_options_internal(const char * const *usagestr,
- const struct option *opts, int full,
- struct parse_opt_ctx_t *ctx)
+static int usage_with_options_internal(const char * const *usagestr,
+ const struct option *opts, int full,
+ struct parse_opt_ctx_t *ctx)
{
struct option *ordered;
@@ -730,9 +821,9 @@ int usage_with_options_internal(const char * const *usagestr,
setup_pager();
- if (strbuf_avail(&error_buf)) {
- fprintf(stderr, " Error: %s\n", error_buf.buf);
- strbuf_release(&error_buf);
+ if (error_buf) {
+ fprintf(stderr, " Error: %s\n", error_buf);
+ zfree(&error_buf);
}
fprintf(stderr, "\n Usage: %s\n", *usagestr++);
@@ -768,7 +859,6 @@ int usage_with_options_internal(const char * const *usagestr,
void usage_with_options(const char * const *usagestr,
const struct option *opts)
{
- exit_browser(false);
usage_with_options_internal(usagestr, opts, 0, NULL);
exit(129);
}
@@ -777,13 +867,15 @@ void usage_with_options_msg(const char * const *usagestr,
const struct option *opts, const char *fmt, ...)
{
va_list ap;
-
- exit_browser(false);
+ char *tmp = error_buf;
va_start(ap, fmt);
- strbuf_addv(&error_buf, fmt, ap);
+ if (vasprintf(&error_buf, fmt, ap) == -1)
+ die("vasprintf failed");
va_end(ap);
+ free(tmp);
+
usage_with_options_internal(usagestr, opts, 0, NULL);
exit(129);
}
@@ -853,15 +945,39 @@ int parse_opt_verbosity_cb(const struct option *opt,
return 0;
}
-void set_option_flag(struct option *opts, int shortopt, const char *longopt,
- int flag)
+static struct option *
+find_option(struct option *opts, int shortopt, const char *longopt)
{
for (; opts->type != OPTION_END; opts++) {
if ((shortopt && opts->short_name == shortopt) ||
(opts->long_name && longopt &&
- !strcmp(opts->long_name, longopt))) {
- opts->flags |= flag;
- break;
- }
+ !strcmp(opts->long_name, longopt)))
+ return opts;
}
+ return NULL;
+}
+
+void set_option_flag(struct option *opts, int shortopt, const char *longopt,
+ int flag)
+{
+ struct option *opt = find_option(opts, shortopt, longopt);
+
+ if (opt)
+ opt->flags |= flag;
+ return;
+}
+
+void set_option_nobuild(struct option *opts, int shortopt,
+ const char *longopt,
+ const char *build_opt,
+ bool can_skip)
+{
+ struct option *opt = find_option(opts, shortopt, longopt);
+
+ if (!opt)
+ return;
+
+ opt->flags |= PARSE_OPT_NOBUILD;
+ opt->flags |= can_skip ? PARSE_OPT_CANSKIP : 0;
+ opt->build_opt = build_opt;
}
diff --git a/tools/perf/util/parse-options.h b/tools/lib/subcmd/parse-options.h
index a8e407bc251e..13a2cc1d6140 100644
--- a/tools/perf/util/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -1,8 +1,8 @@
-#ifndef __PERF_PARSE_OPTIONS_H
-#define __PERF_PARSE_OPTIONS_H
+#ifndef __SUBCMD_PARSE_OPTIONS_H
+#define __SUBCMD_PARSE_OPTIONS_H
-#include <linux/kernel.h>
#include <stdbool.h>
+#include <stdint.h>
enum parse_opt_type {
/* special types */
@@ -41,6 +41,8 @@ enum parse_opt_option_flags {
PARSE_OPT_DISABLED = 32,
PARSE_OPT_EXCLUSIVE = 64,
PARSE_OPT_NOEMPTY = 128,
+ PARSE_OPT_NOBUILD = 256,
+ PARSE_OPT_CANSKIP = 512,
};
struct option;
@@ -96,6 +98,7 @@ struct option {
void *value;
const char *argh;
const char *help;
+ const char *build_opt;
int flags;
parse_opt_cb *callback;
@@ -149,6 +152,9 @@ struct option {
/* parse_options() will filter out the processed options and leave the
* non-option argments in argv[].
* Returns the number of arguments left in argv[].
+ *
+ * NOTE: parse_options() and parse_options_subcommand() may call exit() in the
+ * case of an error (or for 'special' options like --list-cmds or --list-opts).
*/
extern int parse_options(int argc, const char **argv,
const struct option *options,
@@ -195,15 +201,6 @@ extern int parse_options_usage(const char * const *usagestr,
const char *optstr,
bool short_opt);
-extern void parse_options_start(struct parse_opt_ctx_t *ctx,
- int argc, const char **argv, int flags);
-
-extern int parse_options_step(struct parse_opt_ctx_t *ctx,
- const struct option *options,
- const char * const usagestr[]);
-
-extern int parse_options_end(struct parse_opt_ctx_t *ctx);
-
/*----- some often used options -----*/
extern int parse_opt_abbrev_cb(const struct option *, const char *, int);
@@ -226,4 +223,7 @@ extern int parse_opt_verbosity_cb(const struct option *, const char *, int);
extern const char *parse_options_fix_filename(const char *prefix, const char *file);
void set_option_flag(struct option *opts, int sopt, const char *lopt, int flag);
-#endif /* __PERF_PARSE_OPTIONS_H */
+void set_option_nobuild(struct option *opts, int shortopt, const char *longopt,
+ const char *build_opt, bool can_skip);
+
+#endif /* __SUBCMD_PARSE_OPTIONS_H */
diff --git a/tools/perf/util/run-command.c b/tools/lib/subcmd/run-command.c
index 34622b53e733..f4f6c9eb8e59 100644
--- a/tools/perf/util/run-command.c
+++ b/tools/lib/subcmd/run-command.c
@@ -1,7 +1,15 @@
-#include "cache.h"
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/wait.h>
+#include "subcmd-util.h"
#include "run-command.h"
-#include "exec_cmd.h"
-#include "debug.h"
+#include "exec-cmd.h"
+
+#define STRERR_BUFSIZE 128
static inline void close_pair(int fd[2])
{
@@ -112,8 +120,8 @@ int start_command(struct child_process *cmd)
}
if (cmd->preexec_cb)
cmd->preexec_cb();
- if (cmd->perf_cmd) {
- execv_perf_cmd(cmd->argv);
+ if (cmd->exec_cmd) {
+ execv_cmd(cmd->argv);
} else {
execvp(cmd->argv[0], (char *const*) cmd->argv);
}
@@ -164,8 +172,8 @@ static int wait_or_whine(pid_t pid)
if (waiting < 0) {
if (errno == EINTR)
continue;
- error("waitpid failed (%s)",
- strerror_r(errno, sbuf, sizeof(sbuf)));
+ fprintf(stderr, " Error: waitpid failed (%s)",
+ strerror_r(errno, sbuf, sizeof(sbuf)));
return -ERR_RUN_COMMAND_WAITPID;
}
if (waiting != pid)
@@ -207,7 +215,7 @@ static void prepare_run_command_v_opt(struct child_process *cmd,
memset(cmd, 0, sizeof(*cmd));
cmd->argv = argv;
cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0;
- cmd->perf_cmd = opt & RUN_PERF_CMD ? 1 : 0;
+ cmd->exec_cmd = opt & RUN_EXEC_CMD ? 1 : 0;
cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0;
}
diff --git a/tools/perf/util/run-command.h b/tools/lib/subcmd/run-command.h
index 1ef264d5069c..fe2befea1e73 100644
--- a/tools/perf/util/run-command.h
+++ b/tools/lib/subcmd/run-command.h
@@ -1,5 +1,7 @@
-#ifndef __PERF_RUN_COMMAND_H
-#define __PERF_RUN_COMMAND_H
+#ifndef __SUBCMD_RUN_COMMAND_H
+#define __SUBCMD_RUN_COMMAND_H
+
+#include <unistd.h>
enum {
ERR_RUN_COMMAND_FORK = 10000,
@@ -41,7 +43,7 @@ struct child_process {
unsigned no_stdin:1;
unsigned no_stdout:1;
unsigned no_stderr:1;
- unsigned perf_cmd:1; /* if this is to be perf sub-command */
+ unsigned exec_cmd:1; /* if this is to be external sub-command */
unsigned stdout_to_stderr:1;
void (*preexec_cb)(void);
};
@@ -51,8 +53,8 @@ int finish_command(struct child_process *);
int run_command(struct child_process *);
#define RUN_COMMAND_NO_STDIN 1
-#define RUN_PERF_CMD 2 /*If this is to be perf sub-command */
+#define RUN_EXEC_CMD 2 /*If this is to be external sub-command */
#define RUN_COMMAND_STDOUT_TO_STDERR 4
int run_command_v_opt(const char **argv, int opt);
-#endif /* __PERF_RUN_COMMAND_H */
+#endif /* __SUBCMD_RUN_COMMAND_H */
diff --git a/tools/perf/util/sigchain.c b/tools/lib/subcmd/sigchain.c
index ba785e9b1841..3537c348a18e 100644
--- a/tools/perf/util/sigchain.c
+++ b/tools/lib/subcmd/sigchain.c
@@ -1,5 +1,6 @@
+#include <signal.h>
+#include "subcmd-util.h"
#include "sigchain.h"
-#include "cache.h"
#define SIGCHAIN_MAX_SIGNALS 32
diff --git a/tools/perf/util/sigchain.h b/tools/lib/subcmd/sigchain.h
index 959d64eb5557..0c919f2874ca 100644
--- a/tools/perf/util/sigchain.h
+++ b/tools/lib/subcmd/sigchain.h
@@ -1,5 +1,5 @@
-#ifndef __PERF_SIGCHAIN_H
-#define __PERF_SIGCHAIN_H
+#ifndef __SUBCMD_SIGCHAIN_H
+#define __SUBCMD_SIGCHAIN_H
typedef void (*sigchain_fun)(int);
@@ -7,4 +7,4 @@ int sigchain_pop(int sig);
void sigchain_push_common(sigchain_fun f);
-#endif /* __PERF_SIGCHAIN_H */
+#endif /* __SUBCMD_SIGCHAIN_H */
diff --git a/tools/lib/subcmd/subcmd-config.c b/tools/lib/subcmd/subcmd-config.c
new file mode 100644
index 000000000000..d017c728bd1b
--- /dev/null
+++ b/tools/lib/subcmd/subcmd-config.c
@@ -0,0 +1,11 @@
+#include "subcmd-config.h"
+
+#define UNDEFINED "SUBCMD_HAS_NOT_BEEN_INITIALIZED"
+
+struct subcmd_config subcmd_config = {
+ .exec_name = UNDEFINED,
+ .prefix = UNDEFINED,
+ .exec_path = UNDEFINED,
+ .exec_path_env = UNDEFINED,
+ .pager_env = UNDEFINED,
+};
diff --git a/tools/lib/subcmd/subcmd-config.h b/tools/lib/subcmd/subcmd-config.h
new file mode 100644
index 000000000000..cc8514030b5c
--- /dev/null
+++ b/tools/lib/subcmd/subcmd-config.h
@@ -0,0 +1,14 @@
+#ifndef __PERF_SUBCMD_CONFIG_H
+#define __PERF_SUBCMD_CONFIG_H
+
+struct subcmd_config {
+ const char *exec_name;
+ const char *prefix;
+ const char *exec_path;
+ const char *exec_path_env;
+ const char *pager_env;
+};
+
+extern struct subcmd_config subcmd_config;
+
+#endif /* __PERF_SUBCMD_CONFIG_H */
diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h
new file mode 100644
index 000000000000..fc2e45d8aaf1
--- /dev/null
+++ b/tools/lib/subcmd/subcmd-util.h
@@ -0,0 +1,91 @@
+#ifndef __SUBCMD_UTIL_H
+#define __SUBCMD_UTIL_H
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#define NORETURN __attribute__((__noreturn__))
+
+static inline void report(const char *prefix, const char *err, va_list params)
+{
+ char msg[1024];
+ vsnprintf(msg, sizeof(msg), err, params);
+ fprintf(stderr, " %s%s\n", prefix, msg);
+}
+
+static NORETURN inline void die(const char *err, ...)
+{
+ va_list params;
+
+ va_start(params, err);
+ report(" Fatal: ", err, params);
+ exit(128);
+ va_end(params);
+}
+
+#define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
+
+#define alloc_nr(x) (((x)+16)*3/2)
+
+/*
+ * Realloc the buffer pointed at by variable 'x' so that it can hold
+ * at least 'nr' entries; the number of entries currently allocated
+ * is 'alloc', using the standard growing factor alloc_nr() macro.
+ *
+ * DO NOT USE any expression with side-effect for 'x' or 'alloc'.
+ */
+#define ALLOC_GROW(x, nr, alloc) \
+ do { \
+ if ((nr) > alloc) { \
+ if (alloc_nr(alloc) < (nr)) \
+ alloc = (nr); \
+ else \
+ alloc = alloc_nr(alloc); \
+ x = xrealloc((x), alloc * sizeof(*(x))); \
+ } \
+ } while(0)
+
+static inline void *xrealloc(void *ptr, size_t size)
+{
+ void *ret = realloc(ptr, size);
+ if (!ret && !size)
+ ret = realloc(ptr, 1);
+ if (!ret) {
+ ret = realloc(ptr, size);
+ if (!ret && !size)
+ ret = realloc(ptr, 1);
+ if (!ret)
+ die("Out of memory, realloc failed");
+ }
+ return ret;
+}
+
+#define astrcatf(out, fmt, ...) \
+({ \
+ char *tmp = *(out); \
+ if (asprintf((out), "%s" fmt, tmp ?: "", ## __VA_ARGS__) == -1) \
+ die("asprintf failed"); \
+ free(tmp); \
+})
+
+static inline void astrcat(char **out, const char *add)
+{
+ char *tmp = *out;
+
+ if (asprintf(out, "%s%s", tmp ?: "", add) == -1)
+ die("asprintf failed");
+
+ free(tmp);
+}
+
+static inline int prefixcmp(const char *str, const char *prefix)
+{
+ for (; ; str++, prefix++)
+ if (!*prefix)
+ return 0;
+ else if (*str != *prefix)
+ return (unsigned char)*prefix - (unsigned char)*str;
+}
+
+#endif /* __SUBCMD_UTIL_H */
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 2a912df6771b..ea69ce35e902 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -4735,73 +4735,80 @@ static int is_printable_array(char *p, unsigned int len)
return 1;
}
-static void print_event_fields(struct trace_seq *s, void *data,
- int size __maybe_unused,
- struct event_format *event)
+void pevent_print_field(struct trace_seq *s, void *data,
+ struct format_field *field)
{
- struct format_field *field;
unsigned long long val;
unsigned int offset, len, i;
-
- field = event->format.fields;
- while (field) {
- trace_seq_printf(s, " %s=", field->name);
- if (field->flags & FIELD_IS_ARRAY) {
- offset = field->offset;
- len = field->size;
- if (field->flags & FIELD_IS_DYNAMIC) {
- val = pevent_read_number(event->pevent, data + offset, len);
- offset = val;
- len = offset >> 16;
- offset &= 0xffff;
- }
- if (field->flags & FIELD_IS_STRING &&
- is_printable_array(data + offset, len)) {
- trace_seq_printf(s, "%s", (char *)data + offset);
- } else {
- trace_seq_puts(s, "ARRAY[");
- for (i = 0; i < len; i++) {
- if (i)
- trace_seq_puts(s, ", ");
- trace_seq_printf(s, "%02x",
- *((unsigned char *)data + offset + i));
- }
- trace_seq_putc(s, ']');
- field->flags &= ~FIELD_IS_STRING;
- }
+ struct pevent *pevent = field->event->pevent;
+
+ if (field->flags & FIELD_IS_ARRAY) {
+ offset = field->offset;
+ len = field->size;
+ if (field->flags & FIELD_IS_DYNAMIC) {
+ val = pevent_read_number(pevent, data + offset, len);
+ offset = val;
+ len = offset >> 16;
+ offset &= 0xffff;
+ }
+ if (field->flags & FIELD_IS_STRING &&
+ is_printable_array(data + offset, len)) {
+ trace_seq_printf(s, "%s", (char *)data + offset);
} else {
- val = pevent_read_number(event->pevent, data + field->offset,
- field->size);
- if (field->flags & FIELD_IS_POINTER) {
- trace_seq_printf(s, "0x%llx", val);
- } else if (field->flags & FIELD_IS_SIGNED) {
- switch (field->size) {
- case 4:
- /*
- * If field is long then print it in hex.
- * A long usually stores pointers.
- */
- if (field->flags & FIELD_IS_LONG)
- trace_seq_printf(s, "0x%x", (int)val);
- else
- trace_seq_printf(s, "%d", (int)val);
- break;
- case 2:
- trace_seq_printf(s, "%2d", (short)val);
- break;
- case 1:
- trace_seq_printf(s, "%1d", (char)val);
- break;
- default:
- trace_seq_printf(s, "%lld", val);
- }
- } else {
+ trace_seq_puts(s, "ARRAY[");
+ for (i = 0; i < len; i++) {
+ if (i)
+ trace_seq_puts(s, ", ");
+ trace_seq_printf(s, "%02x",
+ *((unsigned char *)data + offset + i));
+ }
+ trace_seq_putc(s, ']');
+ field->flags &= ~FIELD_IS_STRING;
+ }
+ } else {
+ val = pevent_read_number(pevent, data + field->offset,
+ field->size);
+ if (field->flags & FIELD_IS_POINTER) {
+ trace_seq_printf(s, "0x%llx", val);
+ } else if (field->flags & FIELD_IS_SIGNED) {
+ switch (field->size) {
+ case 4:
+ /*
+ * If field is long then print it in hex.
+ * A long usually stores pointers.
+ */
if (field->flags & FIELD_IS_LONG)
- trace_seq_printf(s, "0x%llx", val);
+ trace_seq_printf(s, "0x%x", (int)val);
else
- trace_seq_printf(s, "%llu", val);
+ trace_seq_printf(s, "%d", (int)val);
+ break;
+ case 2:
+ trace_seq_printf(s, "%2d", (short)val);
+ break;
+ case 1:
+ trace_seq_printf(s, "%1d", (char)val);
+ break;
+ default:
+ trace_seq_printf(s, "%lld", val);
}
+ } else {
+ if (field->flags & FIELD_IS_LONG)
+ trace_seq_printf(s, "0x%llx", val);
+ else
+ trace_seq_printf(s, "%llu", val);
}
+ }
+}
+
+void pevent_print_fields(struct trace_seq *s, void *data,
+ int size __maybe_unused, struct event_format *event)
+{
+ struct format_field *field;
+
+ field = event->format.fields;
+ while (field) {
+ trace_seq_printf(s, " %s=", field->name);
+ pevent_print_field(s, data, field);
field = field->next;
}
}
@@ -4827,7 +4834,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
if (event->flags & EVENT_FL_FAILED) {
trace_seq_printf(s, "[FAILED TO PARSE]");
- print_event_fields(s, data, size, event);
+ pevent_print_fields(s, data, size, event);
return;
}
@@ -4968,13 +4975,12 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
sizeof(long) != 8) {
char *p;
- ls = 2;
/* make %l into %ll */
- p = strchr(format, 'l');
- if (p)
+ if (ls == 1 && (p = strchr(format, 'l')))
memmove(p+1, p, strlen(p)+1);
else if (strcmp(format, "%p") == 0)
strcpy(format, "0x%llx");
+ ls = 2;
}
switch (ls) {
case -2:
@@ -5302,7 +5308,7 @@ void pevent_event_info(struct trace_seq *s, struct event_format *event,
int print_pretty = 1;
if (event->pevent->print_raw || (event->flags & EVENT_FL_PRINTRAW))
- print_event_fields(s, record->data, record->size, event);
+ pevent_print_fields(s, record->data, record->size, event);
else {
if (event->handler && !(event->flags & EVENT_FL_NOHANDLE))
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 6fc83c7edbe9..706d9bc24066 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -705,6 +705,10 @@ struct cmdline *pevent_data_pid_from_comm(struct pevent *pevent, const char *com
struct cmdline *next);
int pevent_cmdline_pid(struct pevent *pevent, struct cmdline *cmdline);
+void pevent_print_field(struct trace_seq *s, void *data,
+ struct format_field *field);
+void pevent_print_fields(struct trace_seq *s, void *data,
+ int size __maybe_unused, struct event_format *event);
void pevent_event_info(struct trace_seq *s, struct event_format *event,
struct pevent_record *record);
int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum,
diff --git a/tools/lib/util/find_next_bit.c b/tools/lib/util/find_next_bit.c
deleted file mode 100644
index 41b44f65a79e..000000000000
--- a/tools/lib/util/find_next_bit.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/* find_next_bit.c: fallback find next bit implementation
- *
- * Copied from lib/find_next_bit.c to tools/lib/next_bit.c
- *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/bitops.h>
-#include <asm/types.h>
-#include <asm/byteorder.h>
-
-#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
-
-#ifndef find_next_bit
-/*
- * Find the next set bit in a memory region.
- */
-unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
- unsigned long offset)
-{
- const unsigned long *p = addr + BITOP_WORD(offset);
- unsigned long result = offset & ~(BITS_PER_LONG-1);
- unsigned long tmp;
-
- if (offset >= size)
- return size;
- size -= result;
- offset %= BITS_PER_LONG;
- if (offset) {
- tmp = *(p++);
- tmp &= (~0UL << offset);
- if (size < BITS_PER_LONG)
- goto found_first;
- if (tmp)
- goto found_middle;
- size -= BITS_PER_LONG;
- result += BITS_PER_LONG;
- }
- while (size & ~(BITS_PER_LONG-1)) {
- if ((tmp = *(p++)))
- goto found_middle;
- result += BITS_PER_LONG;
- size -= BITS_PER_LONG;
- }
- if (!size)
- return result;
- tmp = *p;
-
-found_first:
- tmp &= (~0UL >> (BITS_PER_LONG - size));
- if (tmp == 0UL) /* Are any bits set? */
- return result + size; /* Nope. */
-found_middle:
- return result + __ffs(tmp);
-}
-#endif
-
-#ifndef find_first_bit
-/*
- * Find the first set bit in a memory region.
- */
-unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
-{
- const unsigned long *p = addr;
- unsigned long result = 0;
- unsigned long tmp;
-
- while (size & ~(BITS_PER_LONG-1)) {
- if ((tmp = *(p++)))
- goto found;
- result += BITS_PER_LONG;
- size -= BITS_PER_LONG;
- }
- if (!size)
- return result;
-
- tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
- if (tmp == 0UL) /* Are any bits set? */
- return result + size; /* Nope. */
-found:
- return result + __ffs(tmp);
-}
-#endif
diff --git a/tools/net/Makefile b/tools/net/Makefile
index ee577ea03ba5..ddf888010652 100644
--- a/tools/net/Makefile
+++ b/tools/net/Makefile
@@ -4,6 +4,9 @@ CC = gcc
LEX = flex
YACC = bison
+CFLAGS += -Wall -O2
+CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+
%.yacc.c: %.y
$(YACC) -o $@ -d $<
@@ -12,15 +15,13 @@ YACC = bison
all : bpf_jit_disasm bpf_dbg bpf_asm
-bpf_jit_disasm : CFLAGS = -Wall -O2 -DPACKAGE='bpf_jit_disasm'
+bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm'
bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl
bpf_jit_disasm : bpf_jit_disasm.o
-bpf_dbg : CFLAGS = -Wall -O2
bpf_dbg : LDLIBS = -lreadline
bpf_dbg : bpf_dbg.o
-bpf_asm : CFLAGS = -Wall -O2 -I.
bpf_asm : LDLIBS =
bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o
bpf_exp.lex.o : bpf_exp.yacc.c
diff --git a/tools/perf/Build b/tools/perf/Build
index 72237455b400..6b67e6f4179f 100644
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -1,5 +1,6 @@
perf-y += builtin-bench.o
perf-y += builtin-annotate.o
+perf-y += builtin-config.o
perf-y += builtin-diff.o
perf-y += builtin-evlist.o
perf-y += builtin-help.o
@@ -19,6 +20,7 @@ perf-y += builtin-kvm.o
perf-y += builtin-inject.o
perf-y += builtin-mem.o
perf-y += builtin-data.o
+perf-y += builtin-version.o
perf-$(CONFIG_AUDIT) += builtin-trace.o
perf-$(CONFIG_LIBELF) += builtin-probe.o
@@ -34,8 +36,12 @@ paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))"
CFLAGS_builtin-help.o += $(paths)
CFLAGS_builtin-timechart.o += $(paths)
-CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" -include $(OUTPUT)PERF-VERSION-FILE
+CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" \
+ -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" \
+ -DPREFIX="BUILD_STR($(prefix_SQ))" \
+ -include $(OUTPUT)PERF-VERSION-FILE
CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_SQ))"
+CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))"
libperf-y += util/
libperf-y += arch/
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
new file mode 100644
index 000000000000..b9ca1e304158
--- /dev/null
+++ b/tools/perf/Documentation/perf-config.txt
@@ -0,0 +1,103 @@
+perf-config(1)
+==============
+
+NAME
+----
+perf-config - Get and set variables in a configuration file.
+
+SYNOPSIS
+--------
+[verse]
+'perf config' -l | --list
+
+DESCRIPTION
+-----------
+You can manage variables in a configuration file with this command.
+
+OPTIONS
+-------
+
+-l::
+--list::
+ Show current config variables, name and value, for all sections.
+
+CONFIGURATION FILE
+------------------
+
+The perf configuration file contains many variables to change various
+aspects of each of its tools, including output, disk usage, etc.
+The '$HOME/.perfconfig' file is used to store a per-user configuration.
+The file '$(sysconfdir)/perfconfig' can be used to
+store a system-wide default configuration.
+
+Syntax
+~~~~~~
+
+The file consist of sections. A section starts with its name
+surrounded by square brackets and continues till the next section
+begins. Each variable must be in a section, and have the form
+'name = value', for example:
+
+ [section]
+ name1 = value1
+ name2 = value2
+
+Section names are case sensitive and can contain any characters except
+newline (double quote `"` and backslash have to be escaped as `\"` and `\\`,
+respectively). Section headers can't span multiple lines.
+
+Example
+~~~~~~~
+
+Given a $HOME/.perfconfig like this:
+
+#
+# This is the config file, and
+# a '#' and ';' character indicates a comment
+#
+
+ [colors]
+ # Color variables
+ top = red, default
+ medium = green, default
+ normal = lightgray, default
+ selected = white, lightgray
+ code = blue, default
+ addr = magenta, default
+ root = white, blue
+
+ [tui]
+ # Defaults if linked with libslang
+ report = on
+ annotate = on
+ top = on
+
+ [buildid]
+ # Default, disable using /dev/null
+ dir = ~/.debug
+
+ [annotate]
+ # Defaults
+ hide_src_code = false
+ use_offset = true
+ jump_arrows = true
+ show_nr_jumps = false
+
+ [help]
+ # Format can be man, info, web or html
+ format = man
+ autocorrect = 0
+
+ [ui]
+ show-headers = true
+
+ [call-graph]
+ # fp (framepointer), dwarf
+ record-mode = fp
+ print-type = graph
+ order = caller
+ sort-key = function
+
+SEE ALSO
+--------
+linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt
index 1ceb3700ffbb..6f7200fb85cf 100644
--- a/tools/perf/Documentation/perf-evlist.txt
+++ b/tools/perf/Documentation/perf-evlist.txt
@@ -32,6 +32,9 @@ OPTIONS
--group::
Show event group information.
+--trace-fields::
+ Show tracepoint field names.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-list[1],
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index e630a7d2c348..3a1a32f5479f 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -207,11 +207,23 @@ comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-
In per-thread mode with inheritance mode on (default), samples are captured only when
the thread executes on the designated CPUs. Default is to monitor all CPUs.
+-B::
+--no-buildid::
+Do not save the build ids of binaries in the perf.data files. This skips
+post processing after recording, which sometimes makes the final step in
+the recording process to take a long time, as it needs to process all
+events looking for mmap records. The downside is that it can misresolve
+symbols if the workload binaries used when recording get locally rebuilt
+or upgraded, because the only key available in this case is the
+pathname. You can also set the "record.build-id" config variable to
+'skip to have this behaviour permanently.
+
-N::
--no-buildid-cache::
Do not update the buildid cache. This saves some overhead in situations
where the information in the perf.data file (which includes buildids)
-is sufficient.
+is sufficient. You can also set the "record.build-id" config variable to
+'no-cache' to have the same effect.
-G name,...::
--cgroup name,...::
@@ -314,11 +326,17 @@ This option sets the time out limit. The default value is 500 ms.
Record context switch events i.e. events of type PERF_RECORD_SWITCH or
PERF_RECORD_SWITCH_CPU_WIDE.
---clang-path::
+--clang-path=PATH::
Path to clang binary to use for compiling BPF scriptlets.
+(enabled when BPF support is on)
---clang-opt::
+--clang-opt=OPTIONS::
Options passed to clang when compiling BPF scriptlets.
+(enabled when BPF support is on)
+
+--vmlinux=PATH::
+Specify vmlinux path which has debuginfo.
+(enabled when BPF prologue is on)
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 5ce8da1e1256..8a301f6afb37 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -117,6 +117,30 @@ OPTIONS
And default sort keys are changed to comm, dso_from, symbol_from, dso_to
and symbol_to, see '--branch-stack'.
+ If the data file has tracepoint event(s), following (dynamic) sort keys
+ are also available:
+ trace, trace_fields, [<event>.]<field>[/raw]
+
+ - trace: pretty printed trace output in a single column
+ - trace_fields: fields in tracepoints in separate columns
+ - <field name>: optional event and field name for a specific field
+
+ The last form consists of event and field names. If event name is
+ omitted, it searches all events for matching field name. The matched
+ field will be shown only for the event has the field. The event name
+ supports substring match so user doesn't need to specify full subsystem
+ and event name everytime. For example, 'sched:sched_switch' event can
+ be shortened to 'switch' as long as it's not ambiguous. Also event can
+ be specified by its index (starting from 1) preceded by the '%'.
+ So '%1' is the first event, '%2' is the second, and so on.
+
+ The field name can have '/raw' suffix which disables pretty printing
+ and shows raw field value like hex numbers. The --raw-trace option
+ has the same effect for all dynamic sort keys.
+
+ The default sort keys are changed to 'trace' if all events in the data
+ file are tracepoint.
+
-F::
--fields=::
Specify output field - multiple keys can be specified in CSV format.
@@ -170,17 +194,18 @@ OPTIONS
Dump raw trace in ASCII.
-g::
---call-graph=<print_type,threshold[,print_limit],order,sort_key,branch>::
+--call-graph=<print_type,threshold[,print_limit],order,sort_key[,branch],value>::
Display call chains using type, min percent threshold, print limit,
- call order, sort key and branch. Note that ordering of parameters is not
- fixed so any parement can be given in an arbitraty order. One exception
- is the print_limit which should be preceded by threshold.
+ call order, sort key, optional branch and value. Note that ordering of
+ parameters is not fixed so any parement can be given in an arbitraty order.
+ One exception is the print_limit which should be preceded by threshold.
print_type can be either:
- flat: single column, linear exposure of call chains.
- graph: use a graph tree, displaying absolute overhead rates. (default)
- fractal: like graph, but displays relative rates. Each branch of
the tree is considered as a new profiled object.
+ - folded: call chains are displayed in a line, separated by semicolons
- none: disable call chain display.
threshold is a percentage value which specifies a minimum percent to be
@@ -204,6 +229,11 @@ OPTIONS
- branch: include last branch information in callgraph when available.
Usually more convenient to use --branch-history for this.
+ value can be:
+ - percent: diplay overhead percent (default)
+ - period: display event period
+ - count: display event count
+
--children::
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
@@ -365,6 +395,9 @@ include::itrace.txt[]
--socket-filter::
Only report the samples on the processor socket that match with this filter
+--raw-trace::
+ When displaying traceevent output, do not use print fmt or plugins.
+
include::callchain-overhead-calculation.txt[]
SEE ALSO
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 4e074a660826..52ef7a9d50aa 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -10,6 +10,8 @@ SYNOPSIS
[verse]
'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] record [-o file] -- <command> [<options>]
+'perf stat' report [-i file]
DESCRIPTION
-----------
@@ -22,6 +24,11 @@ OPTIONS
<command>...::
Any command you can specify in a shell.
+record::
+ See STAT RECORD.
+
+report::
+ See STAT REPORT.
-e::
--event=::
@@ -159,6 +166,33 @@ filter out the startup phase of the program, which is often very different.
Print statistics of transactional execution if supported.
+STAT RECORD
+-----------
+Stores stat data into perf data file.
+
+-o file::
+--output file::
+Output file name.
+
+STAT REPORT
+-----------
+Reads and reports stat data from perf data file.
+
+-i file::
+--input file::
+Input file name.
+
+--per-socket::
+Aggregate counts per processor socket for system-wide mode measurements.
+
+--per-core::
+Aggregate counts per physical processor for system-wide mode measurements.
+
+-A::
+--no-aggr::
+Do not aggregate counts across all monitored CPUs.
+
+
EXAMPLES
--------
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 556cec09bf50..b0e60e17db38 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -230,6 +230,9 @@ Default is to monitor all CPUS.
The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
Note that this feature may not be available on all processors.
+--raw-trace::
+ When displaying traceevent output, do not use print fmt or plugins.
+
INTERACTIVE PROMPTING KEYS
--------------------------
diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt
new file mode 100644
index 000000000000..a1c10e360db5
--- /dev/null
+++ b/tools/perf/Documentation/tips.txt
@@ -0,0 +1,14 @@
+For a higher level overview, try: perf report --sort comm,dso
+Sample related events with: perf record -e '{cycles,instructions}:S'
+Compare performance results with: perf diff [<old file> <new file>]
+Boolean options have negative forms, e.g.: perf report --no-children
+Customize output of perf script with: perf script -F event,ip,sym
+Generate a script for your data: perf script -g <lang>
+Save output of perf stat using: perf stat record <target workload>
+Create an archive with symtabs to analyse on other machine: perf archive
+Search options using a keyword: perf report -h <keyword>
+Use parent filter to see specific call path: perf report -p <regex>
+List events using substring match: perf list <keyword>
+To see list of saved events and attributes: perf evlist -v
+Use --symfs <dir> if your symbol files are in non-standard locations
+To see callchains in a more compact form: perf report -g folded
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 39c38cb45b00..ddf922f93aa1 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,6 +1,7 @@
tools/perf
tools/arch/alpha/include/asm/barrier.h
tools/arch/arm/include/asm/barrier.h
+tools/arch/arm64/include/asm/barrier.h
tools/arch/ia64/include/asm/barrier.h
tools/arch/mips/include/asm/barrier.h
tools/arch/powerpc/include/asm/barrier.h
@@ -20,14 +21,17 @@ tools/lib/traceevent
tools/lib/bpf
tools/lib/api
tools/lib/bpf
+tools/lib/subcmd
tools/lib/hweight.c
tools/lib/rbtree.c
+tools/lib/string.c
tools/lib/symbol/kallsyms.c
tools/lib/symbol/kallsyms.h
-tools/lib/util/find_next_bit.c
+tools/lib/find_bit.c
tools/include/asm/atomic.h
tools/include/asm/barrier.h
tools/include/asm/bug.h
+tools/include/asm-generic/atomic-gcc.h
tools/include/asm-generic/barrier.h
tools/include/asm-generic/bitops/arch_hweight.h
tools/include/asm-generic/bitops/atomic.h
@@ -50,6 +54,7 @@ tools/include/linux/log2.h
tools/include/linux/poison.h
tools/include/linux/rbtree.h
tools/include/linux/rbtree_augmented.h
+tools/include/linux/string.h
tools/include/linux/types.h
tools/include/linux/err.h
include/asm-generic/bitops/arch_hweight.h
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 0d19d5447d6c..0a22407e1d7d 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -145,9 +145,10 @@ BISON = bison
STRIP = strip
AWK = awk
-LIB_DIR = $(srctree)/tools/lib/api/
+LIB_DIR = $(srctree)/tools/lib/api/
TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
-BPF_DIR = $(srctree)/tools/lib/bpf/
+BPF_DIR = $(srctree)/tools/lib/bpf/
+SUBCMD_DIR = $(srctree)/tools/lib/subcmd/
# include config/Makefile by default and rule out
# non-config cases
@@ -184,15 +185,17 @@ strip-libs = $(filter-out -l%,$(1))
ifneq ($(OUTPUT),)
TE_PATH=$(OUTPUT)
BPF_PATH=$(OUTPUT)
+ SUBCMD_PATH=$(OUTPUT)
ifneq ($(subdir),)
- LIB_PATH=$(OUTPUT)/../lib/api/
+ API_PATH=$(OUTPUT)/../lib/api/
else
- LIB_PATH=$(OUTPUT)
+ API_PATH=$(OUTPUT)
endif
else
TE_PATH=$(TRACE_EVENT_DIR)
- LIB_PATH=$(LIB_DIR)
+ API_PATH=$(LIB_DIR)
BPF_PATH=$(BPF_DIR)
+ SUBCMD_PATH=$(SUBCMD_DIR)
endif
LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
@@ -201,11 +204,13 @@ export LIBTRACEEVENT
LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list
LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
-LIBAPI = $(LIB_PATH)libapi.a
+LIBAPI = $(API_PATH)libapi.a
export LIBAPI
LIBBPF = $(BPF_PATH)libbpf.a
+LIBSUBCMD = $(SUBCMD_PATH)libsubcmd.a
+
# python extension build directories
PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/
PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
@@ -257,7 +262,7 @@ export PERL_PATH
LIB_FILE=$(OUTPUT)libperf.a
-PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT)
+PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD)
ifndef NO_LIBBPF
PERFLIBS += $(LIBBPF)
endif
@@ -420,7 +425,7 @@ $(LIBTRACEEVENT)-clean:
$(call QUIET_CLEAN, libtraceevent)
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null
-install-traceevent-plugins: $(LIBTRACEEVENT)
+install-traceevent-plugins: libtraceevent_plugins
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins
$(LIBAPI): fixdep FORCE
@@ -431,12 +436,19 @@ $(LIBAPI)-clean:
$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
$(LIBBPF): fixdep FORCE
- $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a
+ $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(realpath $(OUTPUT)FEATURE-DUMP)
$(LIBBPF)-clean:
$(call QUIET_CLEAN, libbpf)
$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null
+$(LIBSUBCMD): fixdep FORCE
+ $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a
+
+$(LIBSUBCMD)-clean:
+ $(call QUIET_CLEAN, libsubcmd)
+ $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) clean
+
help:
@echo 'Perf make targets:'
@echo ' doc - make *all* documentation (see below)'
@@ -476,7 +488,7 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
$(DOC_TARGETS):
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
-TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol ../include ../lib/bpf
+TAG_FOLDERS= . ../lib ../include
TAG_FILES= ../../include/uapi/linux/perf_event.h
TAGS:
@@ -555,6 +567,9 @@ endif
$(call QUIET_INSTALL, perf_completion-script) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \
$(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
+ $(call QUIET_INSTALL, perf-tip) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(tip_instdir_SQ)'; \
+ $(INSTALL) Documentation/tips.txt -t '$(DESTDIR_SQ)$(tip_instdir_SQ)'
install-tests: all install-gtk
$(call QUIET_INSTALL, tests) \
@@ -582,15 +597,16 @@ $(INSTALL_DOC_TARGETS):
#
config-clean:
$(call QUIET_CLEAN, config)
- $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
+ $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
-clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean config-clean
+clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
- $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+ $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected
$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32
$(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
- $(OUTPUT)util/intel-pt-decoder/inat-tables.c
+ $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \
+ $(OUTPUT)tests/llvm-src-{base,kbuild,prologue}.c
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
$(python-clean)
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index 7ed00f4b0908..b48de2f5813c 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -2,10 +2,10 @@
#define ARCH_TESTS_H
/* Tests */
-int test__rdpmc(void);
-int test__perf_time_to_tsc(void);
-int test__insn_x86(void);
-int test__intel_cqm_count_nmi_context(void);
+int test__rdpmc(int subtest);
+int test__perf_time_to_tsc(int subtest);
+int test__insn_x86(int subtest);
+int test__intel_cqm_count_nmi_context(int subtest);
#ifdef HAVE_DWARF_UNWIND_SUPPORT
struct thread;
diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c
index b6115dfd28f0..08d9b2bc185c 100644
--- a/tools/perf/arch/x86/tests/insn-x86.c
+++ b/tools/perf/arch/x86/tests/insn-x86.c
@@ -171,7 +171,7 @@ static int test_data_set(struct test_data *dat_set, int x86_64)
* verbose (-v) option to see all the instructions and whether or not they
* decoded successfuly.
*/
-int test__insn_x86(void)
+int test__insn_x86(int subtest __maybe_unused)
{
int ret = 0;
diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c
index d28c1b6a3b54..3e89ba825f6b 100644
--- a/tools/perf/arch/x86/tests/intel-cqm.c
+++ b/tools/perf/arch/x86/tests/intel-cqm.c
@@ -33,7 +33,7 @@ static pid_t spawn(void)
* the last read counter value to avoid triggering a WARN_ON_ONCE() in
* smp_call_function_many() caused by sending IPIs from NMI context.
*/
-int test__intel_cqm_count_nmi_context(void)
+int test__intel_cqm_count_nmi_context(int subtest __maybe_unused)
{
struct perf_evlist *evlist = NULL;
struct perf_evsel *evsel = NULL;
@@ -54,7 +54,7 @@ int test__intel_cqm_count_nmi_context(void)
ret = parse_events(evlist, "intel_cqm/llc_occupancy/", NULL);
if (ret) {
- pr_debug("parse_events failed\n");
+ pr_debug("parse_events failed, is \"intel_cqm/llc_occupancy/\" available?\n");
err = TEST_SKIP;
goto out;
}
diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index 658cd200af74..9d29ee283ac5 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -35,13 +35,12 @@
* %0 is returned, otherwise %-1 is returned. If TSC conversion is not
* supported then then the test passes but " (not supported)" is printed.
*/
-int test__perf_time_to_tsc(void)
+int test__perf_time_to_tsc(int subtest __maybe_unused)
{
struct record_opts opts = {
.mmap_pages = UINT_MAX,
.user_freq = UINT_MAX,
.user_interval = ULLONG_MAX,
- .freq = 4000,
.target = {
.uses_mmap = true,
},
diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c
index e7688214c7cf..7bb0d13c235f 100644
--- a/tools/perf/arch/x86/tests/rdpmc.c
+++ b/tools/perf/arch/x86/tests/rdpmc.c
@@ -149,7 +149,7 @@ out_close:
return 0;
}
-int test__rdpmc(void)
+int test__rdpmc(int subtest __maybe_unused)
{
int status = 0;
int wret = 0;
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index ff63649fa9ac..465970370f3e 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -5,6 +5,7 @@ libperf-y += kvm-stat.o
libperf-y += perf_regs.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
+libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 9b94ce520917..8d8150f1cf9b 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -327,7 +327,7 @@ static int intel_bts_snapshot_start(struct auxtrace_record *itr)
evlist__for_each(btsr->evlist, evsel) {
if (evsel->attr.type == btsr->intel_bts_pmu->type)
- return perf_evlist__disable_event(btsr->evlist, evsel);
+ return perf_evsel__disable(evsel);
}
return -EINVAL;
}
@@ -340,7 +340,7 @@ static int intel_bts_snapshot_finish(struct auxtrace_record *itr)
evlist__for_each(btsr->evlist, evsel) {
if (evsel->attr.type == btsr->intel_bts_pmu->type)
- return perf_evlist__enable_event(btsr->evlist, evsel);
+ return perf_evsel__enable(evsel);
}
return -EINVAL;
}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index b02af064f0f9..f05daacc9e78 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -26,7 +26,7 @@
#include "../../util/evlist.h"
#include "../../util/evsel.h"
#include "../../util/cpumap.h"
-#include "../../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../../util/parse-events.h"
#include "../../util/pmu.h"
#include "../../util/debug.h"
@@ -725,7 +725,7 @@ static int intel_pt_snapshot_start(struct auxtrace_record *itr)
evlist__for_each(ptr->evlist, evsel) {
if (evsel->attr.type == ptr->intel_pt_pmu->type)
- return perf_evlist__disable_event(ptr->evlist, evsel);
+ return perf_evsel__disable(evsel);
}
return -EINVAL;
}
@@ -738,7 +738,7 @@ static int intel_pt_snapshot_finish(struct auxtrace_record *itr)
evlist__for_each(ptr->evlist, evsel) {
if (evsel->attr.type == ptr->intel_pt_pmu->type)
- return perf_evlist__enable_event(ptr->evlist, evsel);
+ return perf_evsel__enable(evsel);
}
return -EINVAL;
}
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index fc9bebd2cca0..0999ac536d86 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -11,7 +11,7 @@
#include "../perf.h"
#include "../util/util.h"
#include "../util/stat.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../util/header.h"
#include "bench.h"
#include "futex.h"
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index bc6a16adbca8..6a18ce21f865 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -5,7 +5,7 @@
#include "../perf.h"
#include "../util/util.h"
#include "../util/stat.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../util/header.h"
#include "bench.h"
#include "futex.h"
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index ad0d9b5342fb..718238683013 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -11,7 +11,7 @@
#include "../perf.h"
#include "../util/util.h"
#include "../util/stat.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../util/header.h"
#include "bench.h"
#include "futex.h"
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 6d8c9fa2a16c..91aaf2a1fa90 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -10,7 +10,7 @@
#include "../perf.h"
#include "../util/util.h"
#include "../util/stat.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../util/header.h"
#include "bench.h"
#include "futex.h"
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index e5e41d3bdce7..f416bd705f66 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -11,7 +11,7 @@
#include "../perf.h"
#include "../util/util.h"
#include "../util/stat.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../util/header.h"
#include "bench.h"
#include "futex.h"
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index 9419b944220f..a91aa85d80ff 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -8,7 +8,7 @@
#include "../perf.h"
#include "../util/util.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../util/header.h"
#include "../util/cloexec.h"
#include "bench.h"
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 492df2752a2d..5049d6357a46 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -7,7 +7,7 @@
#include "../perf.h"
#include "../builtin.h"
#include "../util/util.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../util/cloexec.h"
#include "bench.h"
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index d4ff1b539cfd..bfaf9503de8e 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -11,7 +11,7 @@
#include "../perf.h"
#include "../util/util.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../builtin.h"
#include "bench.h"
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 005cc283790c..1dc2d13cc272 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -10,7 +10,7 @@
*/
#include "../perf.h"
#include "../util/util.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../builtin.h"
#include "bench.h"
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 2bf9b3fd9e61..cc5c1267c738 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -21,7 +21,7 @@
#include "util/evsel.h"
#include "util/annotate.h"
#include "util/event.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/parse-events.h"
#include "util/thread.h"
#include "util/sort.h"
@@ -47,7 +47,7 @@ struct perf_annotate {
};
static int perf_evsel__add_sample(struct perf_evsel *evsel,
- struct perf_sample *sample __maybe_unused,
+ struct perf_sample *sample,
struct addr_location *al,
struct perf_annotate *ann)
{
@@ -72,7 +72,10 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
return 0;
}
- he = __hists__add_entry(hists, al, NULL, NULL, NULL, 1, 1, 0, true);
+ sample->period = 1;
+ sample->weight = 1;
+
+ he = __hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
if (he == NULL)
return -ENOMEM;
@@ -343,18 +346,19 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
return ret;
argc = parse_options(argc, argv, options, annotate_usage, 0);
+ if (argc) {
+ /*
+ * Special case: if there's an argument left then assume that
+ * it's a symbol filter:
+ */
+ if (argc > 1)
+ usage_with_options(annotate_usage, options);
- if (annotate.use_stdio)
- use_browser = 0;
- else if (annotate.use_tui)
- use_browser = 1;
- else if (annotate.use_gtk)
- use_browser = 2;
+ annotate.sym_hist_filter = argv[0];
+ }
file.path = input_name;
- setup_browser(true);
-
annotate.session = perf_session__new(&file, false, &annotate.tool);
if (annotate.session == NULL)
return -1;
@@ -366,19 +370,17 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
if (ret < 0)
goto out_delete;
- if (setup_sorting() < 0)
+ if (setup_sorting(NULL) < 0)
usage_with_options(annotate_usage, options);
- if (argc) {
- /*
- * Special case: if there's an argument left then assume that
- * it's a symbol filter:
- */
- if (argc > 1)
- usage_with_options(annotate_usage, options);
+ if (annotate.use_stdio)
+ use_browser = 0;
+ else if (annotate.use_tui)
+ use_browser = 1;
+ else if (annotate.use_gtk)
+ use_browser = 2;
- annotate.sym_hist_filter = argv[0];
- }
+ setup_browser(true);
ret = __cmd_annotate(&annotate);
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index b17aed36ca16..a1cddc6bbf0f 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -16,7 +16,7 @@
*/
#include "perf.h"
#include "util/util.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "builtin.h"
#include "bench/bench.h"
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 7b8450cd33c2..d93bff7fc0e4 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -16,7 +16,7 @@
#include "util/cache.h"
#include "util/debug.h"
#include "util/header.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/strlist.h"
#include "util/build-id.h"
#include "util/session.h"
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 918b4de29de4..5e914ee79eb3 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -12,7 +12,7 @@
#include "util/build-id.h"
#include "util/cache.h"
#include "util/debug.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/session.h"
#include "util/symbol.h"
#include "util/data.h"
@@ -110,7 +110,7 @@ int cmd_buildid_list(int argc, const char **argv,
setup_pager();
if (show_kernel)
- return sysfs__fprintf_build_id(stdout);
+ return !(sysfs__fprintf_build_id(stdout) > 0);
return perf_session__list_build_ids(force, with_hits);
}
diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
new file mode 100644
index 000000000000..f04e804a9fad
--- /dev/null
+++ b/tools/perf/builtin-config.c
@@ -0,0 +1,66 @@
+/*
+ * builtin-config.c
+ *
+ * Copyright (C) 2015, Taeung Song <treeze.taeung@gmail.com>
+ *
+ */
+#include "builtin.h"
+
+#include "perf.h"
+
+#include "util/cache.h"
+#include <subcmd/parse-options.h>
+#include "util/util.h"
+#include "util/debug.h"
+
+static const char * const config_usage[] = {
+ "perf config [options]",
+ NULL
+};
+
+enum actions {
+ ACTION_LIST = 1
+} actions;
+
+static struct option config_options[] = {
+ OPT_SET_UINT('l', "list", &actions,
+ "show current config variables", ACTION_LIST),
+ OPT_END()
+};
+
+static int show_config(const char *key, const char *value,
+ void *cb __maybe_unused)
+{
+ if (value)
+ printf("%s=%s\n", key, value);
+ else
+ printf("%s\n", key);
+
+ return 0;
+}
+
+int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+ int ret = 0;
+
+ argc = parse_options(argc, argv, config_options, config_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
+ switch (actions) {
+ case ACTION_LIST:
+ if (argc) {
+ pr_err("Error: takes no arguments\n");
+ parse_options_usage(config_usage, config_options, "l", 1);
+ } else {
+ ret = perf_config(show_config, NULL);
+ if (ret < 0)
+ pr_err("Nothing configured, "
+ "please check your ~/.perfconfig file\n");
+ }
+ break;
+ default:
+ usage_with_options(config_usage, config_options);
+ }
+
+ return ret;
+}
diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c
index d6525bc54d13..b97bc1518b44 100644
--- a/tools/perf/builtin-data.c
+++ b/tools/perf/builtin-data.c
@@ -2,7 +2,7 @@
#include "builtin.h"
#include "perf.h"
#include "debug.h"
-#include "parse-options.h"
+#include <subcmd/parse-options.h>
#include "data-convert-bt.h"
typedef int (*data_cmd_fn_t)(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 0b180a885ba3..36ccc2b8827f 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -311,11 +311,11 @@ static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair,
}
static int hists__add_entry(struct hists *hists,
- struct addr_location *al, u64 period,
- u64 weight, u64 transaction)
+ struct addr_location *al,
+ struct perf_sample *sample)
{
- if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight,
- transaction, true) != NULL)
+ if (__hists__add_entry(hists, al, NULL, NULL, NULL,
+ sample, true) != NULL)
return 0;
return -ENOMEM;
}
@@ -336,8 +336,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
return -1;
}
- if (hists__add_entry(hists, &al, sample->period,
- sample->weight, sample->transaction)) {
+ if (hists__add_entry(hists, &al, sample)) {
pr_warning("problem incrementing symbol period, skipping event\n");
goto out_put;
}
@@ -1208,7 +1207,7 @@ static int ui_init(void)
BUG_ON(1);
}
- list_add(&fmt->sort_list, &perf_hpp__sort_list);
+ perf_hpp__register_sort_field(fmt);
return 0;
}
@@ -1280,7 +1279,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
sort__mode = SORT_MODE__DIFF;
- if (setup_sorting() < 0)
+ if (setup_sorting(NULL) < 0)
usage_with_options(diff_usage, options);
setup_pager();
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index f4d62510acbb..8a31f511e1a0 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -12,7 +12,7 @@
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/parse-events.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/session.h"
#include "util/data.h"
#include "util/debug.h"
@@ -26,14 +26,22 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
.mode = PERF_DATA_MODE_READ,
.force = details->force,
};
+ bool has_tracepoint = false;
session = perf_session__new(&file, 0, NULL);
if (session == NULL)
return -1;
- evlist__for_each(session->evlist, pos)
+ evlist__for_each(session->evlist, pos) {
perf_evsel__fprintf(pos, details, stdout);
+ if (pos->attr.type == PERF_TYPE_TRACEPOINT)
+ has_tracepoint = true;
+ }
+
+ if (has_tracepoint && !details->trace_fields)
+ printf("# Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events\n");
+
perf_session__delete(session);
return 0;
}
@@ -49,6 +57,7 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('g', "group", &details.event_group,
"Show event group information"),
OPT_BOOLEAN('f', "force", &details.force, "don't complain, do it"),
+ OPT_BOOLEAN(0, "trace-fields", &details.trace_fields, "Show tracepoint fields"),
OPT_END()
};
const char * const evlist_usage[] = {
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index a7d588bf3cdd..96c1a4cfbbbf 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -6,11 +6,11 @@
#include "perf.h"
#include "util/cache.h"
#include "builtin.h"
-#include "util/exec_cmd.h"
+#include <subcmd/exec-cmd.h>
#include "common-cmds.h"
-#include "util/parse-options.h"
-#include "util/run-command.h"
-#include "util/help.h"
+#include <subcmd/parse-options.h>
+#include <subcmd/run-command.h>
+#include <subcmd/help.h>
#include "util/debug.h"
static struct man_viewer_list {
@@ -407,7 +407,7 @@ static int get_html_page_path(struct strbuf *page_path, const char *page)
#ifndef open_html
static void open_html(const char *path)
{
- execl_perf_cmd("web--browse", "-c", "help.browser", path, NULL);
+ execl_cmd("web--browse", "-c", "help.browser", path, NULL);
}
#endif
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 0a945d2e8ca5..0022e02ed31a 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -18,7 +18,7 @@
#include "util/data.h"
#include "util/auxtrace.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include <linux/list.h>
@@ -675,6 +675,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
.fork = perf_event__repipe,
.exit = perf_event__repipe,
.lost = perf_event__repipe,
+ .lost_samples = perf_event__repipe,
.aux = perf_event__repipe,
.itrace_start = perf_event__repipe,
.context_switch = perf_event__repipe,
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 93ce665f976f..118010553d0c 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -12,7 +12,7 @@
#include "util/tool.h"
#include "util/callchain.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/trace-event.h"
#include "util/data.h"
#include "util/cpumap.h"
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index dd94b4ca2213..4418d9214872 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -10,7 +10,7 @@
#include "util/header.h"
#include "util/session.h"
#include "util/intlist.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/trace-event.h"
#include "util/debug.h"
#include "util/tool.h"
@@ -1351,7 +1351,6 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
disable_buildid_cache();
use_browser = 0;
- setup_browser(false);
if (argc) {
argc = parse_options(argc, argv, live_options,
@@ -1409,8 +1408,6 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
err = kvm_events_live_report(kvm);
out:
- exit_browser(0);
-
if (kvm->session)
perf_session__delete(kvm->session);
kvm->session = NULL;
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index bf679e2c978b..5e22db4684b8 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -14,7 +14,7 @@
#include "util/parse-events.h"
#include "util/cache.h"
#include "util/pmu.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
{
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index de16aaed516e..ce3bfb48b26f 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -9,7 +9,7 @@
#include "util/thread.h"
#include "util/header.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/trace-event.h"
#include "util/debug.h"
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 80170aace5d4..390170041696 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -1,7 +1,7 @@
#include "builtin.h"
#include "perf.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/trace-event.h"
#include "util/tool.h"
#include "util/session.h"
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 132afc97676c..9af859b28b15 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -37,7 +37,7 @@
#include "util/strfilter.h"
#include "util/symbol.h"
#include "util/debug.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/probe-finder.h"
#include "util/probe-event.h"
#include "util/probe-file.h"
@@ -249,6 +249,9 @@ static int opt_show_vars(const struct option *opt,
return ret;
}
+#else
+# define opt_show_lines NULL
+# define opt_show_vars NULL
#endif
static int opt_add_probe_event(const struct option *opt,
const char *str, int unset __maybe_unused)
@@ -473,7 +476,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
opt_add_probe_event),
OPT_BOOLEAN('f', "force", &probe_conf.force_add, "forcibly add events"
" with existing name"),
-#ifdef HAVE_DWARF_SUPPORT
OPT_CALLBACK('L', "line", NULL,
"FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]",
"Show source code lines.", opt_show_lines),
@@ -490,7 +492,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
"directory", "path to kernel source"),
OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines,
"Don't search inlined functions"),
-#endif
OPT__DRY_RUN(&probe_event_dry_run),
OPT_INTEGER('\0', "max-probes", &probe_conf.max_probes,
"Set how many probe points can be found for a probe."),
@@ -521,6 +522,16 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
#ifdef HAVE_DWARF_SUPPORT
set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE);
set_option_flag(options, 'V', "vars", PARSE_OPT_EXCLUSIVE);
+#else
+# define set_nobuild(s, l, c) set_option_nobuild(options, s, l, "NO_DWARF=1", c)
+ set_nobuild('L', "line", false);
+ set_nobuild('V', "vars", false);
+ set_nobuild('\0', "externs", false);
+ set_nobuild('\0', "range", false);
+ set_nobuild('k', "vmlinux", true);
+ set_nobuild('s', "source", true);
+ set_nobuild('\0', "no-inlines", true);
+# undef set_nobuild
#endif
set_option_flag(options, 'F', "funcs", PARSE_OPT_EXCLUSIVE);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 199fc31e3919..dc4e0adf5c5b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -11,7 +11,7 @@
#include "util/build-id.h"
#include "util/util.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/parse-events.h"
#include "util/callchain.h"
@@ -452,6 +452,8 @@ static void record__init_features(struct record *rec)
if (!rec->opts.full_auxtrace)
perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
+
+ perf_header__clear_feat(&session->header, HEADER_STAT);
}
static volatile int workload_exec_errno;
@@ -813,8 +815,12 @@ int record_parse_callchain_opt(const struct option *opt,
}
ret = parse_callchain_record_opt(arg, &callchain_param);
- if (!ret)
+ if (!ret) {
+ /* Enable data address sampling for DWARF unwind. */
+ if (callchain_param.record_mode == CALLCHAIN_DWARF)
+ record->sample_address = true;
callchain_debug();
+ }
return ret;
}
@@ -837,6 +843,19 @@ int record_callchain_opt(const struct option *opt,
static int perf_record_config(const char *var, const char *value, void *cb)
{
+ struct record *rec = cb;
+
+ if (!strcmp(var, "record.build-id")) {
+ if (!strcmp(value, "cache"))
+ rec->no_buildid_cache = false;
+ else if (!strcmp(value, "no-cache"))
+ rec->no_buildid_cache = true;
+ else if (!strcmp(value, "skip"))
+ rec->no_buildid = true;
+ else
+ return -1;
+ return 0;
+ }
if (!strcmp(var, "record.call-graph"))
var = "call-graph.record-mode"; /* fall-through */
@@ -1113,12 +1132,12 @@ struct option __record_options[] = {
"per thread proc mmap processing timeout in ms"),
OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
"Record context switch events"),
-#ifdef HAVE_LIBBPF_SUPPORT
OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
"clang binary to use for compiling BPF scriptlets"),
OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
"options passed to clang when compiling BPF scriptlets"),
-#endif
+ OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
+ "file", "vmlinux pathname"),
OPT_END()
};
@@ -1130,6 +1149,27 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
struct record *rec = &record;
char errbuf[BUFSIZ];
+#ifndef HAVE_LIBBPF_SUPPORT
+# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
+ set_nobuild('\0', "clang-path", true);
+ set_nobuild('\0', "clang-opt", true);
+# undef set_nobuild
+#endif
+
+#ifndef HAVE_BPF_PROLOGUE
+# if !defined (HAVE_DWARF_SUPPORT)
+# define REASON "NO_DWARF=1"
+# elif !defined (HAVE_LIBBPF_SUPPORT)
+# define REASON "NO_LIBBPF=1"
+# else
+# define REASON "this architecture doesn't support BPF prologue"
+# endif
+# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
+ set_nobuild('\0', "vmlinux", true);
+# undef set_nobuild
+# undef REASON
+#endif
+
rec->evlist = perf_evlist__new();
if (rec->evlist == NULL)
return -ENOMEM;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 2853ad2bd435..d5a42ee12529 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -27,7 +27,7 @@
#include "util/session.h"
#include "util/tool.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/parse-events.h"
#include "util/thread.h"
@@ -44,8 +44,7 @@
struct report {
struct perf_tool tool;
struct perf_session *session;
- bool force, use_tui, use_gtk, use_stdio;
- bool hide_unresolved;
+ bool use_tui, use_gtk, use_stdio;
bool dont_use_callchains;
bool show_full_info;
bool show_threads;
@@ -146,7 +145,7 @@ static int process_sample_event(struct perf_tool *tool,
struct hist_entry_iter iter = {
.evsel = evsel,
.sample = sample,
- .hide_unresolved = rep->hide_unresolved,
+ .hide_unresolved = symbol_conf.hide_unresolved,
.add_entry_cb = hist_iter__report_callback,
};
int ret = 0;
@@ -157,7 +156,7 @@ static int process_sample_event(struct perf_tool *tool,
return -1;
}
- if (rep->hide_unresolved && al.sym == NULL)
+ if (symbol_conf.hide_unresolved && al.sym == NULL)
goto out_put;
if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
@@ -434,7 +433,7 @@ static int report__browse_hists(struct report *rep)
int ret;
struct perf_session *session = rep->session;
struct perf_evlist *evlist = session->evlist;
- const char *help = "For a higher level overview, try: perf report --sort comm,dso";
+ const char *help = perf_tip(TIPDIR);
switch (use_browser) {
case 1:
@@ -514,20 +513,26 @@ static int __cmd_report(struct report *rep)
if (rep->cpu_list) {
ret = perf_session__cpu_bitmap(session, rep->cpu_list,
rep->cpu_bitmap);
- if (ret)
+ if (ret) {
+ ui__error("failed to set cpu bitmap\n");
return ret;
+ }
}
if (rep->show_threads)
perf_read_values_init(&rep->show_threads_values);
ret = report__setup_sample_type(rep);
- if (ret)
+ if (ret) {
+ /* report__setup_sample_type() already showed error message */
return ret;
+ }
ret = perf_session__process_events(session);
- if (ret)
+ if (ret) {
+ ui__error("failed to process sample\n");
return ret;
+ }
report__warn_kptr_restrict(rep);
@@ -625,7 +630,7 @@ parse_percent_limit(const struct option *opt, const char *str,
return 0;
}
-#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function"
+#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent"
const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
CALLCHAIN_REPORT_HELP
@@ -678,7 +683,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
"file", "vmlinux pathname"),
OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
"file", "kallsyms pathname"),
- OPT_BOOLEAN('f', "force", &report.force, "don't complain, do it"),
+ OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
"load module symbols - WARNING: use only with -k and LIVE kernel"),
OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
@@ -708,7 +713,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
"Only display entries with parent-match"),
OPT_CALLBACK_DEFAULT('g', "call-graph", &report,
- "print_type,threshold[,print_limit],order,sort_key[,branch]",
+ "print_type,threshold[,print_limit],order,sort_key[,branch],value",
report_callchain_help, &report_parse_callchain_opt,
callchain_default_opt),
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
@@ -740,7 +745,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_STRING_NOEMPTY('t', "field-separator", &symbol_conf.field_sep, "separator",
"separator for columns, no spaces will be added between "
"columns '.' is reserved."),
- OPT_BOOLEAN('U', "hide-unresolved", &report.hide_unresolved,
+ OPT_BOOLEAN('U', "hide-unresolved", &symbol_conf.hide_unresolved,
"Only display entries resolved to a symbol"),
OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
"Look for files with symbols relative to this directory"),
@@ -783,6 +788,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
"Show callgraph from reference event"),
OPT_INTEGER(0, "socket-filter", &report.socket_filter,
"only show processor socket that match with this filter"),
+ OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
+ "Show raw trace event output (do not use print fmt or plugins)"),
OPT_END()
};
struct perf_data_file file = {
@@ -796,6 +803,16 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
perf_config(report__config, &report);
argc = parse_options(argc, argv, options, report_usage, 0);
+ if (argc) {
+ /*
+ * Special case: if there's an argument left then assume that
+ * it's a symbol filter:
+ */
+ if (argc > 1)
+ usage_with_options(report_usage, options);
+
+ report.symbol_filter_str = argv[0];
+ }
if (symbol_conf.vmlinux_name &&
access(symbol_conf.vmlinux_name, R_OK)) {
@@ -832,7 +849,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
}
file.path = input_name;
- file.force = report.force;
+ file.force = symbol_conf.force;
repeat:
session = perf_session__new(&file, false, &report.tool);
@@ -882,7 +899,7 @@ repeat:
symbol_conf.cumulate_callchain = false;
}
- if (setup_sorting() < 0) {
+ if (setup_sorting(session->evlist) < 0) {
if (sort_order)
parse_options_usage(report_usage, options, "s", 1);
if (field_order)
@@ -941,17 +958,6 @@ repeat:
if (symbol__init(&session->header.env) < 0)
goto error;
- if (argc) {
- /*
- * Special case: if there's an argument left then assume that
- * it's a symbol filter:
- */
- if (argc > 1)
- usage_with_options(report_usage, options);
-
- report.symbol_filter_str = argv[0];
- }
-
sort__setup_elide(stdout);
ret = __cmd_report(&report);
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index e3d3e32c0a93..871b55ae22a4 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -12,7 +12,7 @@
#include "util/tool.h"
#include "util/cloexec.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/trace-event.h"
#include "util/debug.h"
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 72b5deb4bd79..c691214d820f 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -3,9 +3,9 @@
#include "perf.h"
#include "util/cache.h"
#include "util/debug.h"
-#include "util/exec_cmd.h"
+#include <subcmd/exec-cmd.h>
#include "util/header.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/perf_regs.h"
#include "util/session.h"
#include "util/tool.h"
@@ -18,7 +18,11 @@
#include "util/sort.h"
#include "util/data.h"
#include "util/auxtrace.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+#include "util/stat.h"
#include <linux/bitmap.h>
+#include "asm/bug.h"
static char const *script_name;
static char const *generate_script_lang;
@@ -32,6 +36,7 @@ static bool print_flags;
static bool nanosecs;
static const char *cpu_list;
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+static struct perf_stat_config stat_config;
unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
@@ -130,6 +135,18 @@ static struct {
.invalid_fields = PERF_OUTPUT_TRACE,
},
+
+ [PERF_TYPE_BREAKPOINT] = {
+ .user_set = false,
+
+ .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+ PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+ PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+ PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
+ PERF_OUTPUT_PERIOD,
+
+ .invalid_fields = PERF_OUTPUT_TRACE,
+ },
};
static bool output_set_by_user(void)
@@ -204,6 +221,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
struct perf_event_attr *attr = &evsel->attr;
bool allow_user_set;
+ if (perf_header__has_feat(&session->header, HEADER_STAT))
+ return 0;
+
allow_user_set = perf_header__has_feat(&session->header,
HEADER_AUXTRACE);
@@ -588,8 +608,35 @@ static void print_sample_flags(u32 flags)
printf(" %-4s ", str);
}
-static void process_event(union perf_event *event, struct perf_sample *sample,
- struct perf_evsel *evsel, struct addr_location *al)
+struct perf_script {
+ struct perf_tool tool;
+ struct perf_session *session;
+ bool show_task_events;
+ bool show_mmap_events;
+ bool show_switch_events;
+ bool allocated;
+ struct cpu_map *cpus;
+ struct thread_map *threads;
+ int name_width;
+};
+
+static int perf_evlist__max_name_len(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ int max = 0;
+
+ evlist__for_each(evlist, evsel) {
+ int len = strlen(perf_evsel__name(evsel));
+
+ max = MAX(len, max);
+ }
+
+ return max;
+}
+
+static void process_event(struct perf_script *script, union perf_event *event,
+ struct perf_sample *sample, struct perf_evsel *evsel,
+ struct addr_location *al)
{
struct thread *thread = al->thread;
struct perf_event_attr *attr = &evsel->attr;
@@ -604,7 +651,12 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
if (PRINT_FIELD(EVNAME)) {
const char *evname = perf_evsel__name(evsel);
- printf("%s: ", evname ? evname : "[unknown]");
+
+ if (!script->name_width)
+ script->name_width = perf_evlist__max_name_len(script->session->evlist);
+
+ printf("%*s: ", script->name_width,
+ evname ? evname : "[unknown]");
}
if (print_flags)
@@ -643,65 +695,81 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
printf("\n");
}
-static int default_start_script(const char *script __maybe_unused,
- int argc __maybe_unused,
- const char **argv __maybe_unused)
-{
- return 0;
-}
+static struct scripting_ops *scripting_ops;
-static int default_flush_script(void)
+static void __process_stat(struct perf_evsel *counter, u64 tstamp)
{
- return 0;
+ int nthreads = thread_map__nr(counter->threads);
+ int ncpus = perf_evsel__nr_cpus(counter);
+ int cpu, thread;
+ static int header_printed;
+
+ if (counter->system_wide)
+ nthreads = 1;
+
+ if (!header_printed) {
+ printf("%3s %8s %15s %15s %15s %15s %s\n",
+ "CPU", "THREAD", "VAL", "ENA", "RUN", "TIME", "EVENT");
+ header_printed = 1;
+ }
+
+ for (thread = 0; thread < nthreads; thread++) {
+ for (cpu = 0; cpu < ncpus; cpu++) {
+ struct perf_counts_values *counts;
+
+ counts = perf_counts(counter->counts, cpu, thread);
+
+ printf("%3d %8d %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %s\n",
+ counter->cpus->map[cpu],
+ thread_map__pid(counter->threads, thread),
+ counts->val,
+ counts->ena,
+ counts->run,
+ tstamp,
+ perf_evsel__name(counter));
+ }
+ }
}
-static int default_stop_script(void)
+static void process_stat(struct perf_evsel *counter, u64 tstamp)
{
- return 0;
+ if (scripting_ops && scripting_ops->process_stat)
+ scripting_ops->process_stat(&stat_config, counter, tstamp);
+ else
+ __process_stat(counter, tstamp);
}
-static int default_generate_script(struct pevent *pevent __maybe_unused,
- const char *outfile __maybe_unused)
+static void process_stat_interval(u64 tstamp)
{
- return 0;
+ if (scripting_ops && scripting_ops->process_stat_interval)
+ scripting_ops->process_stat_interval(tstamp);
}
-static struct scripting_ops default_scripting_ops = {
- .start_script = default_start_script,
- .flush_script = default_flush_script,
- .stop_script = default_stop_script,
- .process_event = process_event,
- .generate_script = default_generate_script,
-};
-
-static struct scripting_ops *scripting_ops;
-
static void setup_scripting(void)
{
setup_perl_scripting();
setup_python_scripting();
-
- scripting_ops = &default_scripting_ops;
}
static int flush_scripting(void)
{
- return scripting_ops->flush_script();
+ return scripting_ops ? scripting_ops->flush_script() : 0;
}
static int cleanup_scripting(void)
{
pr_debug("\nperf script stopped\n");
- return scripting_ops->stop_script();
+ return scripting_ops ? scripting_ops->stop_script() : 0;
}
-static int process_sample_event(struct perf_tool *tool __maybe_unused,
+static int process_sample_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine)
{
+ struct perf_script *scr = container_of(tool, struct perf_script, tool);
struct addr_location al;
if (debug_mode) {
@@ -727,20 +795,16 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
goto out_put;
- scripting_ops->process_event(event, sample, evsel, &al);
+ if (scripting_ops)
+ scripting_ops->process_event(event, sample, evsel, &al);
+ else
+ process_event(scr, event, sample, evsel, &al);
+
out_put:
addr_location__put(&al);
return 0;
}
-struct perf_script {
- struct perf_tool tool;
- struct perf_session *session;
- bool show_task_events;
- bool show_mmap_events;
- bool show_switch_events;
-};
-
static int process_attr(struct perf_tool *tool, union perf_event *event,
struct perf_evlist **pevlist)
{
@@ -1156,6 +1220,8 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
type = PERF_TYPE_TRACEPOINT;
else if (!strcmp(str, "raw"))
type = PERF_TYPE_RAW;
+ else if (!strcmp(str, "break"))
+ type = PERF_TYPE_BREAKPOINT;
else {
fprintf(stderr, "Invalid event type in field string.\n");
rc = -EINVAL;
@@ -1421,7 +1487,7 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
char first_half[BUFSIZ];
char *script_root;
- snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path());
+ snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
scripts_dir = opendir(scripts_path);
if (!scripts_dir)
@@ -1542,7 +1608,7 @@ int find_scripts(char **scripts_array, char **scripts_path_array)
if (!session)
return -1;
- snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path());
+ snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
scripts_dir = opendir(scripts_path);
if (!scripts_dir) {
@@ -1600,7 +1666,7 @@ static char *get_script_path(const char *script_root, const char *suffix)
char lang_path[MAXPATHLEN];
char *__script_root;
- snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path());
+ snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
scripts_dir = opendir(scripts_path);
if (!scripts_dir)
@@ -1695,6 +1761,87 @@ static void script__setup_sample_type(struct perf_script *script)
}
}
+static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ struct stat_round_event *round = &event->stat_round;
+ struct perf_evsel *counter;
+
+ evlist__for_each(session->evlist, counter) {
+ perf_stat_process_counter(&stat_config, counter);
+ process_stat(counter, round->time);
+ }
+
+ process_stat_interval(round->time);
+ return 0;
+}
+
+static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ perf_event__read_stat_config(&stat_config, &event->stat_config);
+ return 0;
+}
+
+static int set_maps(struct perf_script *script)
+{
+ struct perf_evlist *evlist = script->session->evlist;
+
+ if (!script->cpus || !script->threads)
+ return 0;
+
+ if (WARN_ONCE(script->allocated, "stats double allocation\n"))
+ return -EINVAL;
+
+ perf_evlist__set_maps(evlist, script->cpus, script->threads);
+
+ if (perf_evlist__alloc_stats(evlist, true))
+ return -ENOMEM;
+
+ script->allocated = true;
+ return 0;
+}
+
+static
+int process_thread_map_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+
+ if (script->threads) {
+ pr_warning("Extra thread map event, ignoring.\n");
+ return 0;
+ }
+
+ script->threads = thread_map__new_event(&event->thread_map);
+ if (!script->threads)
+ return -ENOMEM;
+
+ return set_maps(script);
+}
+
+static
+int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+
+ if (script->cpus) {
+ pr_warning("Extra cpu map event, ignoring.\n");
+ return 0;
+ }
+
+ script->cpus = cpu_map__new_data(&event->cpu_map.data);
+ if (!script->cpus)
+ return -ENOMEM;
+
+ return set_maps(script);
+}
+
int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
{
bool show_full_info = false;
@@ -1723,6 +1870,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
.auxtrace_info = perf_event__process_auxtrace_info,
.auxtrace = perf_event__process_auxtrace,
.auxtrace_error = perf_event__process_auxtrace_error,
+ .stat = perf_event__process_stat_event,
+ .stat_round = process_stat_round_event,
+ .stat_config = process_stat_config_event,
+ .thread_map = process_thread_map_event,
+ .cpu_map = process_cpu_map_event,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
@@ -1836,7 +1988,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
scripting_max_stack = itrace_synth_opts.callchain_sz;
/* make sure PERF_EXEC_PATH is set for scripts */
- perf_set_argv_exec_path(perf_exec_path());
+ set_argv_exec_path(get_argv_exec_path());
if (argc && !script_name && !rec_script_path && !rep_script_path) {
int live_pipe[2];
@@ -2076,6 +2228,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
flush_scripting();
out_delete:
+ perf_evlist__free_stats(session->evlist);
perf_session__delete(session);
if (script_started)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index e77880b5094d..7f568244662b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -45,7 +45,7 @@
#include "builtin.h"
#include "util/cgroup.h"
#include "util/util.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/parse-events.h"
#include "util/pmu.h"
#include "util/event.h"
@@ -59,6 +59,9 @@
#include "util/thread.h"
#include "util/thread_map.h"
#include "util/counts.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "asm/bug.h"
#include <stdlib.h>
#include <sys/prctl.h>
@@ -126,6 +129,21 @@ static bool append_file;
static const char *output_name;
static int output_fd;
+struct perf_stat {
+ bool record;
+ struct perf_data_file file;
+ struct perf_session *session;
+ u64 bytes_written;
+ struct perf_tool tool;
+ bool maps_allocated;
+ struct cpu_map *cpus;
+ struct thread_map *threads;
+ enum aggr_mode aggr_mode;
+};
+
+static struct perf_stat perf_stat;
+#define STAT_RECORD perf_stat.record
+
static volatile int done = 0;
static struct perf_stat_config stat_config = {
@@ -161,15 +179,43 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
attr->inherit = !no_inherit;
- if (target__has_cpu(&target))
- return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
+ /*
+ * Some events get initialized with sample_(period/type) set,
+ * like tracepoints. Clear it up for counting.
+ */
+ attr->sample_period = 0;
+
+ /*
+ * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
+ * while avoiding that older tools show confusing messages.
+ *
+ * However for pipe sessions we need to keep it zero,
+ * because script's perf_evsel__check_attr is triggered
+ * by attr->sample_type != 0, and we can't run it on
+ * stat sessions.
+ */
+ if (!(STAT_RECORD && perf_stat.file.is_pipe))
+ attr->sample_type = PERF_SAMPLE_IDENTIFIER;
- if (!target__has_task(&target) && perf_evsel__is_group_leader(evsel)) {
+ /*
+ * Disabling all counters initially, they will be enabled
+ * either manually by us or by kernel via enable_on_exec
+ * set later.
+ */
+ if (perf_evsel__is_group_leader(evsel)) {
attr->disabled = 1;
- if (!initial_delay)
+
+ /*
+ * In case of initial_delay we enable tracee
+ * events manually.
+ */
+ if (target__none(&target) && !initial_delay)
attr->enable_on_exec = 1;
}
+ if (target__has_cpu(&target))
+ return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
+
return perf_evsel__open_per_thread(evsel, evsel_list->threads);
}
@@ -185,6 +231,42 @@ static inline int nsec_counter(struct perf_evsel *evsel)
return 0;
}
+static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) {
+ pr_err("failed to write perf data, error: %m\n");
+ return -1;
+ }
+
+ perf_stat.bytes_written += event->header.size;
+ return 0;
+}
+
+static int write_stat_round_event(u64 tm, u64 type)
+{
+ return perf_event__synthesize_stat_round(NULL, tm, type,
+ process_synthesized_event,
+ NULL);
+}
+
+#define WRITE_STAT_ROUND_EVENT(time, interval) \
+ write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
+
+#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
+
+static int
+perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread,
+ struct perf_counts_values *count)
+{
+ struct perf_sample_id *sid = SID(counter, cpu, thread);
+
+ return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
+ process_synthesized_event, NULL);
+}
+
/*
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
@@ -208,6 +290,13 @@ static int read_counter(struct perf_evsel *counter)
count = perf_counts(counter->counts, cpu, thread);
if (perf_evsel__read(counter, cpu, thread, count))
return -1;
+
+ if (STAT_RECORD) {
+ if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
+ pr_err("failed to write stat event\n");
+ return -1;
+ }
+ }
}
}
@@ -241,21 +330,26 @@ static void process_interval(void)
clock_gettime(CLOCK_MONOTONIC, &ts);
diff_timespec(&rs, &ts, &ref_time);
+ if (STAT_RECORD) {
+ if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSECS_PER_SEC + rs.tv_nsec, INTERVAL))
+ pr_err("failed to write stat round event\n");
+ }
+
print_counters(&rs, 0, NULL);
}
-static void handle_initial_delay(void)
+static void enable_counters(void)
{
- struct perf_evsel *counter;
-
- if (initial_delay) {
- const int ncpus = cpu_map__nr(evsel_list->cpus),
- nthreads = thread_map__nr(evsel_list->threads);
-
+ if (initial_delay)
usleep(initial_delay * 1000);
- evlist__for_each(evsel_list, counter)
- perf_evsel__enable(counter, ncpus, nthreads);
- }
+
+ /*
+ * We need to enable counters only if:
+ * - we don't have tracee (attaching to task or cpu)
+ * - we have initial delay configured
+ */
+ if (!target__none(&target) || initial_delay)
+ perf_evlist__enable(evsel_list);
}
static volatile int workload_exec_errno;
@@ -271,6 +365,135 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
workload_exec_errno = info->si_value.sival_int;
}
+static bool has_unit(struct perf_evsel *counter)
+{
+ return counter->unit && *counter->unit;
+}
+
+static bool has_scale(struct perf_evsel *counter)
+{
+ return counter->scale != 1;
+}
+
+static int perf_stat_synthesize_config(bool is_pipe)
+{
+ struct perf_evsel *counter;
+ int err;
+
+ if (is_pipe) {
+ err = perf_event__synthesize_attrs(NULL, perf_stat.session,
+ process_synthesized_event);
+ if (err < 0) {
+ pr_err("Couldn't synthesize attrs.\n");
+ return err;
+ }
+ }
+
+ /*
+ * Synthesize other events stuff not carried within
+ * attr event - unit, scale, name
+ */
+ evlist__for_each(evsel_list, counter) {
+ if (!counter->supported)
+ continue;
+
+ /*
+ * Synthesize unit and scale only if it's defined.
+ */
+ if (has_unit(counter)) {
+ err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel unit.\n");
+ return err;
+ }
+ }
+
+ if (has_scale(counter)) {
+ err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel scale.\n");
+ return err;
+ }
+ }
+
+ if (counter->own_cpus) {
+ err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel scale.\n");
+ return err;
+ }
+ }
+
+ /*
+ * Name is needed only for pipe output,
+ * perf.data carries event names.
+ */
+ if (is_pipe) {
+ err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel name.\n");
+ return err;
+ }
+ }
+ }
+
+ err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads,
+ process_synthesized_event,
+ NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize thread map.\n");
+ return err;
+ }
+
+ err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus,
+ process_synthesized_event, NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize thread map.\n");
+ return err;
+ }
+
+ err = perf_event__synthesize_stat_config(NULL, &stat_config,
+ process_synthesized_event, NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize config.\n");
+ return err;
+ }
+
+ return 0;
+}
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+
+static int __store_counter_ids(struct perf_evsel *counter,
+ struct cpu_map *cpus,
+ struct thread_map *threads)
+{
+ int cpu, thread;
+
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ for (thread = 0; thread < threads->nr; thread++) {
+ int fd = FD(counter, cpu, thread);
+
+ if (perf_evlist__id_add_fd(evsel_list, counter,
+ cpu, thread, fd) < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int store_counter_ids(struct perf_evsel *counter)
+{
+ struct cpu_map *cpus = counter->cpus;
+ struct thread_map *threads = counter->threads;
+
+ if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr))
+ return -ENOMEM;
+
+ return __store_counter_ids(counter, cpus, threads);
+}
+
static int __run_perf_stat(int argc, const char **argv)
{
int interval = stat_config.interval;
@@ -281,6 +504,7 @@ static int __run_perf_stat(int argc, const char **argv)
size_t l;
int status = 0;
const bool forks = (argc > 0);
+ bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false;
if (interval) {
ts.tv_sec = interval / 1000;
@@ -291,7 +515,7 @@ static int __run_perf_stat(int argc, const char **argv)
}
if (forks) {
- if (perf_evlist__prepare_workload(evsel_list, &target, argv, false,
+ if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
workload_exec_failed_signal) < 0) {
perror("failed to prepare workload");
return -1;
@@ -335,6 +559,9 @@ static int __run_perf_stat(int argc, const char **argv)
l = strlen(counter->unit);
if (l > unit_width)
unit_width = l;
+
+ if (STAT_RECORD && store_counter_ids(counter))
+ return -1;
}
if (perf_evlist__apply_filters(evsel_list, &counter)) {
@@ -344,6 +571,24 @@ static int __run_perf_stat(int argc, const char **argv)
return -1;
}
+ if (STAT_RECORD) {
+ int err, fd = perf_data_file__fd(&perf_stat.file);
+
+ if (is_pipe) {
+ err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file));
+ } else {
+ err = perf_session__write_header(perf_stat.session, evsel_list,
+ fd, false);
+ }
+
+ if (err < 0)
+ return err;
+
+ err = perf_stat_synthesize_config(is_pipe);
+ if (err < 0)
+ return err;
+ }
+
/*
* Enable counters and exec the command:
*/
@@ -352,7 +597,7 @@ static int __run_perf_stat(int argc, const char **argv)
if (forks) {
perf_evlist__start_workload(evsel_list);
- handle_initial_delay();
+ enable_counters();
if (interval) {
while (!waitpid(child_pid, &status, WNOHANG)) {
@@ -371,7 +616,7 @@ static int __run_perf_stat(int argc, const char **argv)
if (WIFSIGNALED(status))
psignal(WTERMSIG(status), argv[0]);
} else {
- handle_initial_delay();
+ enable_counters();
while (!done) {
nanosleep(&ts, NULL);
if (interval)
@@ -810,8 +1055,8 @@ static void print_header(int argc, const char **argv)
else if (target.cpu_list)
fprintf(output, "\'CPU(s) %s", target.cpu_list);
else if (!target__has_task(&target)) {
- fprintf(output, "\'%s", argv[0]);
- for (i = 1; i < argc; i++)
+ fprintf(output, "\'%s", argv ? argv[0] : "pipe");
+ for (i = 1; argv && (i < argc); i++)
fprintf(output, " %s", argv[i]);
} else if (target.pid)
fprintf(output, "process id \'%s", target.pid);
@@ -847,6 +1092,10 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
struct perf_evsel *counter;
char buf[64], *prefix = NULL;
+ /* Do not print anything if we record to the pipe. */
+ if (STAT_RECORD && perf_stat.file.is_pipe)
+ return;
+
if (interval)
print_interval(prefix = buf, ts);
else
@@ -1077,6 +1326,109 @@ static int perf_stat_init_aggr_mode(void)
return cpus_aggr_map ? 0 : -ENOMEM;
}
+static void perf_stat__exit_aggr_mode(void)
+{
+ cpu_map__put(aggr_map);
+ cpu_map__put(cpus_aggr_map);
+ aggr_map = NULL;
+ cpus_aggr_map = NULL;
+}
+
+static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx)
+{
+ int cpu;
+
+ if (idx > map->nr)
+ return -1;
+
+ cpu = map->map[idx];
+
+ if (cpu >= env->nr_cpus_online)
+ return -1;
+
+ return cpu;
+}
+
+static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
+{
+ struct perf_env *env = data;
+ int cpu = perf_env__get_cpu(env, map, idx);
+
+ return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
+}
+
+static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
+{
+ struct perf_env *env = data;
+ int core = -1, cpu = perf_env__get_cpu(env, map, idx);
+
+ if (cpu != -1) {
+ int socket_id = env->cpu[cpu].socket_id;
+
+ /*
+ * Encode socket in upper 16 bits
+ * core_id is relative to socket, and
+ * we need a global id. So we combine
+ * socket + core id.
+ */
+ core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff);
+ }
+
+ return core;
+}
+
+static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus,
+ struct cpu_map **sockp)
+{
+ return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
+}
+
+static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
+ struct cpu_map **corep)
+{
+ return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
+}
+
+static int perf_stat__get_socket_file(struct cpu_map *map, int idx)
+{
+ return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
+}
+
+static int perf_stat__get_core_file(struct cpu_map *map, int idx)
+{
+ return perf_env__get_core(map, idx, &perf_stat.session->header.env);
+}
+
+static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
+{
+ struct perf_env *env = &st->session->header.env;
+
+ switch (stat_config.aggr_mode) {
+ case AGGR_SOCKET:
+ if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) {
+ perror("cannot build socket map");
+ return -1;
+ }
+ aggr_get_id = perf_stat__get_socket_file;
+ break;
+ case AGGR_CORE:
+ if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) {
+ perror("cannot build core map");
+ return -1;
+ }
+ aggr_get_id = perf_stat__get_core_file;
+ break;
+ case AGGR_NONE:
+ case AGGR_GLOBAL:
+ case AGGR_THREAD:
+ case AGGR_UNSET:
+ default:
+ break;
+ }
+
+ return 0;
+}
+
/*
* Add default attributes, if there were no attributes specified or
* if -d/--detailed, -d -d or -d -d -d is used:
@@ -1236,6 +1588,225 @@ static int add_default_attributes(void)
return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
}
+static const char * const recort_usage[] = {
+ "perf stat record [<options>]",
+ NULL,
+};
+
+static void init_features(struct perf_session *session)
+{
+ int feat;
+
+ for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
+ perf_header__set_feat(&session->header, feat);
+
+ perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
+ perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
+ perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
+ perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
+}
+
+static int __cmd_record(int argc, const char **argv)
+{
+ struct perf_session *session;
+ struct perf_data_file *file = &perf_stat.file;
+
+ argc = parse_options(argc, argv, stat_options, record_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
+ if (output_name)
+ file->path = output_name;
+
+ if (run_count != 1 || forever) {
+ pr_err("Cannot use -r option with perf stat record.\n");
+ return -1;
+ }
+
+ session = perf_session__new(file, false, NULL);
+ if (session == NULL) {
+ pr_err("Perf session creation failed.\n");
+ return -1;
+ }
+
+ init_features(session);
+
+ session->evlist = evsel_list;
+ perf_stat.session = session;
+ perf_stat.record = true;
+ return argc;
+}
+
+static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ struct stat_round_event *round = &event->stat_round;
+ struct perf_evsel *counter;
+ struct timespec tsh, *ts = NULL;
+ const char **argv = session->header.env.cmdline_argv;
+ int argc = session->header.env.nr_cmdline;
+
+ evlist__for_each(evsel_list, counter)
+ perf_stat_process_counter(&stat_config, counter);
+
+ if (round->type == PERF_STAT_ROUND_TYPE__FINAL)
+ update_stats(&walltime_nsecs_stats, round->time);
+
+ if (stat_config.interval && round->time) {
+ tsh.tv_sec = round->time / NSECS_PER_SEC;
+ tsh.tv_nsec = round->time % NSECS_PER_SEC;
+ ts = &tsh;
+ }
+
+ print_counters(ts, argc, argv);
+ return 0;
+}
+
+static
+int process_stat_config_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct perf_stat *st = container_of(tool, struct perf_stat, tool);
+
+ perf_event__read_stat_config(&stat_config, &event->stat_config);
+
+ if (cpu_map__empty(st->cpus)) {
+ if (st->aggr_mode != AGGR_UNSET)
+ pr_warning("warning: processing task data, aggregation mode not set\n");
+ return 0;
+ }
+
+ if (st->aggr_mode != AGGR_UNSET)
+ stat_config.aggr_mode = st->aggr_mode;
+
+ if (perf_stat.file.is_pipe)
+ perf_stat_init_aggr_mode();
+ else
+ perf_stat_init_aggr_mode_file(st);
+
+ return 0;
+}
+
+static int set_maps(struct perf_stat *st)
+{
+ if (!st->cpus || !st->threads)
+ return 0;
+
+ if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
+ return -EINVAL;
+
+ perf_evlist__set_maps(evsel_list, st->cpus, st->threads);
+
+ if (perf_evlist__alloc_stats(evsel_list, true))
+ return -ENOMEM;
+
+ st->maps_allocated = true;
+ return 0;
+}
+
+static
+int process_thread_map_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct perf_stat *st = container_of(tool, struct perf_stat, tool);
+
+ if (st->threads) {
+ pr_warning("Extra thread map event, ignoring.\n");
+ return 0;
+ }
+
+ st->threads = thread_map__new_event(&event->thread_map);
+ if (!st->threads)
+ return -ENOMEM;
+
+ return set_maps(st);
+}
+
+static
+int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct perf_stat *st = container_of(tool, struct perf_stat, tool);
+ struct cpu_map *cpus;
+
+ if (st->cpus) {
+ pr_warning("Extra cpu map event, ignoring.\n");
+ return 0;
+ }
+
+ cpus = cpu_map__new_data(&event->cpu_map.data);
+ if (!cpus)
+ return -ENOMEM;
+
+ st->cpus = cpus;
+ return set_maps(st);
+}
+
+static const char * const report_usage[] = {
+ "perf stat report [<options>]",
+ NULL,
+};
+
+static struct perf_stat perf_stat = {
+ .tool = {
+ .attr = perf_event__process_attr,
+ .event_update = perf_event__process_event_update,
+ .thread_map = process_thread_map_event,
+ .cpu_map = process_cpu_map_event,
+ .stat_config = process_stat_config_event,
+ .stat = perf_event__process_stat_event,
+ .stat_round = process_stat_round_event,
+ },
+ .aggr_mode = AGGR_UNSET,
+};
+
+static int __cmd_report(int argc, const char **argv)
+{
+ struct perf_session *session;
+ const struct option options[] = {
+ OPT_STRING('i', "input", &input_name, "file", "input file name"),
+ OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
+ "aggregate counts per processor socket", AGGR_SOCKET),
+ OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
+ "aggregate counts per physical processor core", AGGR_CORE),
+ OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
+ "disable CPU count aggregation", AGGR_NONE),
+ OPT_END()
+ };
+ struct stat st;
+ int ret;
+
+ argc = parse_options(argc, argv, options, report_usage, 0);
+
+ if (!input_name || !strlen(input_name)) {
+ if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
+ input_name = "-";
+ else
+ input_name = "perf.data";
+ }
+
+ perf_stat.file.path = input_name;
+ perf_stat.file.mode = PERF_DATA_MODE_READ;
+
+ session = perf_session__new(&perf_stat.file, false, &perf_stat.tool);
+ if (session == NULL)
+ return -1;
+
+ perf_stat.session = session;
+ stat_config.output = stderr;
+ evsel_list = session->evlist;
+
+ ret = perf_session__process_events(session);
+ if (ret)
+ return ret;
+
+ perf_session__delete(session);
+ return 0;
+}
+
int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
{
const char * const stat_usage[] = {
@@ -1246,6 +1817,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
const char *mode;
FILE *output = stderr;
unsigned int interval;
+ const char * const stat_subcommands[] = { "record", "report" };
setlocale(LC_ALL, "");
@@ -1253,12 +1825,30 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
if (evsel_list == NULL)
return -ENOMEM;
- argc = parse_options(argc, argv, stat_options, stat_usage,
- PARSE_OPT_STOP_AT_NON_OPTION);
+ argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
+ (const char **) stat_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
+ if (csv_sep) {
+ csv_output = true;
+ if (!strcmp(csv_sep, "\\t"))
+ csv_sep = "\t";
+ } else
+ csv_sep = DEFAULT_SEPARATOR;
+
+ if (argc && !strncmp(argv[0], "rec", 3)) {
+ argc = __cmd_record(argc, argv);
+ if (argc < 0)
+ return -1;
+ } else if (argc && !strncmp(argv[0], "rep", 3))
+ return __cmd_report(argc, argv);
interval = stat_config.interval;
- if (output_name && strcmp(output_name, "-"))
+ /*
+ * For record command the -o is already taken care of.
+ */
+ if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
output = NULL;
if (output_name && output_fd) {
@@ -1296,13 +1886,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
stat_config.output = output;
- if (csv_sep) {
- csv_output = true;
- if (!strcmp(csv_sep, "\\t"))
- csv_sep = "\t";
- } else
- csv_sep = DEFAULT_SEPARATOR;
-
/*
* let the spreadsheet do the pretty-printing
*/
@@ -1425,6 +2008,42 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
if (!forever && status != -1 && !interval)
print_counters(NULL, argc, argv);
+ if (STAT_RECORD) {
+ /*
+ * We synthesize the kernel mmap record just so that older tools
+ * don't emit warnings about not being able to resolve symbols
+ * due to /proc/sys/kernel/kptr_restrict settings and instear provide
+ * a saner message about no samples being in the perf.data file.
+ *
+ * This also serves to suppress a warning about f_header.data.size == 0
+ * in header.c at the moment 'perf stat record' gets introduced, which
+ * is not really needed once we start adding the stat specific PERF_RECORD_
+ * records, but the need to suppress the kptr_restrict messages in older
+ * tools remain -acme
+ */
+ int fd = perf_data_file__fd(&perf_stat.file);
+ int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
+ process_synthesized_event,
+ &perf_stat.session->machines.host);
+ if (err) {
+ pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
+ "older tools may produce warnings about this file\n.");
+ }
+
+ if (!interval) {
+ if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
+ pr_err("failed to write stat round event\n");
+ }
+
+ if (!perf_stat.file.is_pipe) {
+ perf_stat.session->header.data_size += perf_stat.bytes_written;
+ perf_session__write_header(perf_stat.session, evsel_list, fd, true);
+ }
+
+ perf_session__delete(perf_stat.session);
+ }
+
+ perf_stat__exit_aggr_mode();
perf_evlist__free_stats(evsel_list);
out:
perf_evlist__delete(evsel_list);
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 30e59620179d..bd7a7757176f 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -30,7 +30,7 @@
#include "perf.h"
#include "util/header.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/parse-events.h"
#include "util/event.h"
#include "util/session.h"
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 7e2e72e6d9d1..bf01cbb0ef23 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -34,7 +34,7 @@
#include "util/top.h"
#include "util/util.h"
#include <linux/rbtree.h>
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/parse-events.h"
#include "util/cpumap.h"
#include "util/xyarray.h"
@@ -175,42 +175,40 @@ static void perf_top__record_precise_ip(struct perf_top *top,
int counter, u64 ip)
{
struct annotation *notes;
- struct symbol *sym;
+ struct symbol *sym = he->ms.sym;
int err = 0;
- if (he == NULL || he->ms.sym == NULL ||
- ((top->sym_filter_entry == NULL ||
- top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1))
+ if (sym == NULL || (use_browser == 0 &&
+ (top->sym_filter_entry == NULL ||
+ top->sym_filter_entry->ms.sym != sym)))
return;
- sym = he->ms.sym;
notes = symbol__annotation(sym);
if (pthread_mutex_trylock(&notes->lock))
return;
- ip = he->ms.map->map_ip(he->ms.map, ip);
-
- if (ui__has_annotation())
- err = hist_entry__inc_addr_samples(he, counter, ip);
+ err = hist_entry__inc_addr_samples(he, counter, ip);
pthread_mutex_unlock(&notes->lock);
- /*
- * This function is now called with he->hists->lock held.
- * Release it before going to sleep.
- */
- pthread_mutex_unlock(&he->hists->lock);
+ if (unlikely(err)) {
+ /*
+ * This function is now called with he->hists->lock held.
+ * Release it before going to sleep.
+ */
+ pthread_mutex_unlock(&he->hists->lock);
+
+ if (err == -ERANGE && !he->ms.map->erange_warned)
+ ui__warn_map_erange(he->ms.map, sym, ip);
+ else if (err == -ENOMEM) {
+ pr_err("Not enough memory for annotating '%s' symbol!\n",
+ sym->name);
+ sleep(1);
+ }
- if (err == -ERANGE && !he->ms.map->erange_warned)
- ui__warn_map_erange(he->ms.map, sym, ip);
- else if (err == -ENOMEM) {
- pr_err("Not enough memory for annotating '%s' symbol!\n",
- sym->name);
- sleep(1);
+ pthread_mutex_lock(&he->hists->lock);
}
-
- pthread_mutex_lock(&he->hists->lock);
}
static void perf_top__show_details(struct perf_top *top)
@@ -687,14 +685,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
struct hist_entry *he = iter->he;
struct perf_evsel *evsel = iter->evsel;
- if (sort__has_sym && single) {
- u64 ip = al->addr;
-
- if (al->map)
- ip = al->map->unmap_ip(al->map, ip);
-
- perf_top__record_precise_ip(top, he, evsel->idx, ip);
- }
+ if (sort__has_sym && single)
+ perf_top__record_precise_ip(top, he, evsel->idx, al->addr);
hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
!(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
@@ -964,7 +956,7 @@ static int __cmd_top(struct perf_top *top)
if (ret)
goto out_delete;
- if (perf_session__register_idle_thread(top->session) == NULL)
+ if (perf_session__register_idle_thread(top->session) < 0)
goto out_delete;
machine__synthesize_threads(&top->session->machines.host, &opts->target,
@@ -1218,6 +1210,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
"branch filter mask", "branch stack filter modes",
parse_branch_stack),
+ OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
+ "Show raw trace event output (do not use print fmt or plugins)"),
OPT_END()
};
const char * const top_usage[] = {
@@ -1239,11 +1233,17 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
if (argc)
usage_with_options(top_usage, options);
+ if (!top.evlist->nr_entries &&
+ perf_evlist__add_default(top.evlist) < 0) {
+ pr_err("Not enough memory for event selector list\n");
+ goto out_delete_evlist;
+ }
+
sort__mode = SORT_MODE__TOP;
/* display thread wants entries to be collapsed in a different tree */
sort__need_collapse = 1;
- if (setup_sorting() < 0) {
+ if (setup_sorting(top.evlist) < 0) {
if (sort_order)
parse_options_usage(top_usage, options, "s", 1);
if (field_order)
@@ -1279,12 +1279,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
if (target__none(target))
target->system_wide = true;
- if (perf_evlist__create_maps(top.evlist, target) < 0)
- usage_with_options(top_usage, options);
-
- if (!top.evlist->nr_entries &&
- perf_evlist__add_default(top.evlist) < 0) {
- ui__error("Not enough memory for event selector list\n");
+ if (perf_evlist__create_maps(top.evlist, target) < 0) {
+ ui__error("Couldn't create thread/CPU maps: %s\n",
+ errno == ENOENT ? "No such process" : strerror_r(errno, errbuf, sizeof(errbuf)));
goto out_delete_evlist;
}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index c783d8fd3a80..20916dd77aac 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -22,11 +22,11 @@
#include "util/color.h"
#include "util/debug.h"
#include "util/evlist.h"
-#include "util/exec_cmd.h"
+#include <subcmd/exec-cmd.h>
#include "util/machine.h"
#include "util/session.h"
#include "util/thread.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/strlist.h"
#include "util/intlist.h"
#include "util/thread_map.h"
diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c
new file mode 100644
index 000000000000..9b10cda6b6dc
--- /dev/null
+++ b/tools/perf/builtin-version.c
@@ -0,0 +1,10 @@
+#include "util/util.h"
+#include "builtin.h"
+#include "perf.h"
+
+int cmd_version(int argc __maybe_unused, const char **argv __maybe_unused,
+ const char *prefix __maybe_unused)
+{
+ printf("perf version %s\n", perf_version_string);
+ return 0;
+}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 3688ad29085f..3f871b54e261 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -17,6 +17,7 @@ extern int cmd_annotate(int argc, const char **argv, const char *prefix);
extern int cmd_bench(int argc, const char **argv, const char *prefix);
extern int cmd_buildid_cache(int argc, const char **argv, const char *prefix);
extern int cmd_buildid_list(int argc, const char **argv, const char *prefix);
+extern int cmd_config(int argc, const char **argv, const char *prefix);
extern int cmd_diff(int argc, const char **argv, const char *prefix);
extern int cmd_evlist(int argc, const char **argv, const char *prefix);
extern int cmd_help(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 00fcaf8a5b8d..ab5cbaa170d0 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -9,6 +9,7 @@ perf-buildid-cache mainporcelain common
perf-buildid-list mainporcelain common
perf-data mainporcelain common
perf-diff mainporcelain common
+perf-config mainporcelain common
perf-evlist mainporcelain common
perf-inject mainporcelain common
perf-kmem mainporcelain common
@@ -25,4 +26,4 @@ perf-stat mainporcelain common
perf-test mainporcelain common
perf-timechart mainporcelain common
perf-top mainporcelain common
-perf-trace mainporcelain common
+perf-trace mainporcelain audit
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index de89ec574361..254d06e39bea 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -135,8 +135,6 @@ endif
ifeq ($(DEBUG),0)
CFLAGS += -O6
-else
- CFLAGS += $(call cc-option,-Og,-O0)
endif
ifdef PARSER_DEBUG
@@ -318,6 +316,18 @@ ifndef NO_LIBELF
CFLAGS += -DHAVE_LIBBPF_SUPPORT
$(call detected,CONFIG_LIBBPF)
endif
+
+ ifndef NO_DWARF
+ ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
+ CFLAGS += -DHAVE_BPF_PROLOGUE
+ $(call detected,CONFIG_BPF_PROLOGUE)
+ else
+ msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset());
+ endif
+ else
+ msg := $(warning DWARF support is off, BPF prologue is disabled);
+ endif
+
endif # NO_LIBBPF
endif # NO_LIBELF
@@ -681,6 +691,7 @@ sharedir = $(prefix)/share
template_dir = share/perf-core/templates
STRACE_GROUPS_DIR = share/perf-core/strace/groups
htmldir = share/doc/perf-doc
+tipdir = share/doc/perf-tip
ifeq ($(prefix),/usr)
sysconfdir = /etc
ETC_PERFCONFIG = $(sysconfdir)/perfconfig
@@ -707,6 +718,7 @@ infodir_SQ = $(subst ','\'',$(infodir))
perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
template_dir_SQ = $(subst ','\'',$(template_dir))
htmldir_SQ = $(subst ','\'',$(htmldir))
+tipdir_SQ = $(subst ','\'',$(tipdir))
prefix_SQ = $(subst ','\'',$(prefix))
sysconfdir_SQ = $(subst ','\'',$(sysconfdir))
libdir_SQ = $(subst ','\'',$(libdir))
@@ -714,12 +726,15 @@ libdir_SQ = $(subst ','\'',$(libdir))
ifneq ($(filter /%,$(firstword $(perfexecdir))),)
perfexec_instdir = $(perfexecdir)
STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR)
+tip_instdir = $(tipdir)
else
perfexec_instdir = $(prefix)/$(perfexecdir)
STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR)
+tip_instdir = $(prefix)/$(tipdir)
endif
perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
STRACE_GROUPS_INSTDIR_SQ = $(subst ','\'',$(STRACE_GROUPS_INSTDIR))
+tip_instdir_SQ = $(subst ','\'',$(tip_instdir))
# If we install to $(HOME) we keep the traceevent default:
# $(HOME)/.traceevent/plugins
@@ -741,6 +756,10 @@ ifeq ($(VF),1)
$(call print_var,sysconfdir)
$(call print_var,LIBUNWIND_DIR)
$(call print_var,LIBDW_DIR)
+
+ ifeq ($(dwarf-post-unwind),1)
+ $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text))
+ endif
$(info )
endif
@@ -756,6 +775,7 @@ $(call detected_var,ETC_PERFCONFIG_SQ)
$(call detected_var,STRACE_GROUPS_DIR_SQ)
$(call detected_var,prefix_SQ)
$(call detected_var,perfexecdir_SQ)
+$(call detected_var,tipdir_SQ)
$(call detected_var,LIBDIR)
$(call detected_var,GTK_CFLAGS)
$(call detected_var,PERL_EMBED_CCOPTS)
diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak
index 0ebef09c0842..c16ce833079c 100644
--- a/tools/perf/config/utilities.mak
+++ b/tools/perf/config/utilities.mak
@@ -177,22 +177,3 @@ $(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2)))
endef
_ge_attempt = $(if $(get-executable),$(get-executable),$(call _gea_err,$(2)))
_gea_err = $(if $(1),$(error Please set '$(1)' appropriately))
-
-# try-run
-# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
-# Exit code chooses option. "$$TMP" is can be used as temporary file and
-# is automatically cleaned up.
-try-run = $(shell set -e; \
- TMP="$(TMPOUT).$$$$.tmp"; \
- TMPO="$(TMPOUT).$$$$.o"; \
- if ($(1)) >/dev/null 2>&1; \
- then echo "$(2)"; \
- else echo "$(3)"; \
- fi; \
- rm -f "$$TMP" "$$TMPO")
-
-# cc-option
-# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
-
-cc-option = $(call try-run,\
- $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 3d4c7c09adea..a929618b8eb6 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -9,16 +9,18 @@
#include "builtin.h"
#include "util/env.h"
-#include "util/exec_cmd.h"
+#include <subcmd/exec-cmd.h>
#include "util/cache.h"
#include "util/quote.h"
-#include "util/run-command.h"
+#include <subcmd/run-command.h>
#include "util/parse-events.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/bpf-loader.h"
#include "util/debug.h"
#include <api/fs/tracing_path.h>
#include <pthread.h>
+#include <stdlib.h>
+#include <time.h>
const char perf_usage_string[] =
"perf [--version] [--help] [OPTIONS] COMMAND [ARGS]";
@@ -39,6 +41,7 @@ struct cmd_struct {
static struct cmd_struct commands[] = {
{ "buildid-cache", cmd_buildid_cache, 0 },
{ "buildid-list", cmd_buildid_list, 0 },
+ { "config", cmd_config, 0 },
{ "diff", cmd_diff, 0 },
{ "evlist", cmd_evlist, 0 },
{ "help", cmd_help, 0 },
@@ -118,7 +121,7 @@ static void commit_pager_choice(void)
{
switch (use_pager) {
case 0:
- setenv("PERF_PAGER", "cat", 1);
+ setenv(PERF_PAGER_ENVIRONMENT, "cat", 1);
break;
case 1:
/* setup_pager(); */
@@ -182,9 +185,9 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
if (!prefixcmp(cmd, CMD_EXEC_PATH)) {
cmd += strlen(CMD_EXEC_PATH);
if (*cmd == '=')
- perf_set_argv_exec_path(cmd + 1);
+ set_argv_exec_path(cmd + 1);
else {
- puts(perf_exec_path());
+ puts(get_argv_exec_path());
exit(0);
}
} else if (!strcmp(cmd, "--html-path")) {
@@ -383,6 +386,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
use_pager = 1;
commit_pager_choice();
+ perf_env__set_cmdline(&perf_env, argc, argv);
status = p->fn(argc, argv, prefix);
exit_browser(status);
perf_env__exit(&perf_env);
@@ -528,14 +532,20 @@ int main(int argc, const char **argv)
const char *cmd;
char sbuf[STRERR_BUFSIZE];
+ /* libsubcmd init */
+ exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
+ pager_init(PERF_PAGER_ENVIRONMENT);
+
/* The page_size is placed in util object. */
page_size = sysconf(_SC_PAGE_SIZE);
cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
- cmd = perf_extract_argv0_path(argv[0]);
+ cmd = extract_argv0_path(argv[0]);
if (!cmd)
cmd = "perf-help";
+ srandom(time(NULL));
+
/* get debugfs/tracefs mount point from /proc/mounts */
tracing_path_mount();
diff --git a/tools/perf/scripts/python/stat-cpi.py b/tools/perf/scripts/python/stat-cpi.py
new file mode 100644
index 000000000000..8b60f343dd07
--- /dev/null
+++ b/tools/perf/scripts/python/stat-cpi.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+
+data = {}
+times = []
+threads = []
+cpus = []
+
+def get_key(time, event, cpu, thread):
+ return "%d-%s-%d-%d" % (time, event, cpu, thread)
+
+def store_key(time, cpu, thread):
+ if (time not in times):
+ times.append(time)
+
+ if (cpu not in cpus):
+ cpus.append(cpu)
+
+ if (thread not in threads):
+ threads.append(thread)
+
+def store(time, event, cpu, thread, val, ena, run):
+ #print "event %s cpu %d, thread %d, time %d, val %d, ena %d, run %d" % \
+ # (event, cpu, thread, time, val, ena, run)
+
+ store_key(time, cpu, thread)
+ key = get_key(time, event, cpu, thread)
+ data[key] = [ val, ena, run]
+
+def get(time, event, cpu, thread):
+ key = get_key(time, event, cpu, thread)
+ return data[key][0]
+
+def stat__cycles_k(cpu, thread, time, val, ena, run):
+ store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions_k(cpu, thread, time, val, ena, run):
+ store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__cycles_u(cpu, thread, time, val, ena, run):
+ store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions_u(cpu, thread, time, val, ena, run):
+ store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__cycles(cpu, thread, time, val, ena, run):
+ store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions(cpu, thread, time, val, ena, run):
+ store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__interval(time):
+ for cpu in cpus:
+ for thread in threads:
+ cyc = get(time, "cycles", cpu, thread)
+ ins = get(time, "instructions", cpu, thread)
+ cpi = 0
+
+ if ins != 0:
+ cpi = cyc/float(ins)
+
+ print "%15f: cpu %d, thread %d -> cpi %f (%d/%d)" % (time/(float(1000000000)), cpu, thread, cpi, cyc, ins)
+
+def trace_end():
+ pass
+# XXX trace_end callback could be used as an alternative place
+# to compute same values as in the script above:
+#
+# for time in times:
+# for cpu in cpus:
+# for thread in threads:
+# cyc = get(time, "cycles", cpu, thread)
+# ins = get(time, "instructions", cpu, thread)
+#
+# if ins != 0:
+# cpi = cyc/float(ins)
+#
+# print "time %.9f, cpu %d, thread %d -> cpi %f" % (time/(float(1000000000)), cpu, thread, cpi)
diff --git a/tools/perf/tests/.gitignore b/tools/perf/tests/.gitignore
index 489fc9ffbcb0..bf016c439fbd 100644
--- a/tools/perf/tests/.gitignore
+++ b/tools/perf/tests/.gitignore
@@ -1,2 +1,3 @@
llvm-src-base.c
llvm-src-kbuild.c
+llvm-src-prologue.c
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index f41ebf8849fe..614899b88b37 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -31,24 +31,34 @@ perf-y += sample-parsing.o
perf-y += parse-no-sample-id-all.o
perf-y += kmod-path.o
perf-y += thread-map.o
-perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o
+perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o
perf-y += bpf.o
perf-y += topology.o
+perf-y += cpumap.o
+perf-y += stat.o
+perf-y += event_update.o
-$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c
+$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
$(call rule_mkdir)
$(Q)echo '#include <tests/llvm.h>' > $@
$(Q)echo 'const char test_llvm__bpf_base_prog[] =' >> $@
$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
$(Q)echo ';' >> $@
-$(OUTPUT)tests/llvm-src-kbuild.c: tests/bpf-script-test-kbuild.c
+$(OUTPUT)tests/llvm-src-kbuild.c: tests/bpf-script-test-kbuild.c tests/Build
$(call rule_mkdir)
$(Q)echo '#include <tests/llvm.h>' > $@
$(Q)echo 'const char test_llvm__bpf_test_kbuild_prog[] =' >> $@
$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
$(Q)echo ';' >> $@
+$(OUTPUT)tests/llvm-src-prologue.c: tests/bpf-script-test-prologue.c tests/Build
+ $(call rule_mkdir)
+ $(Q)echo '#include <tests/llvm.h>' > $@
+ $(Q)echo 'const char test_llvm__bpf_test_prologue_prog[] =' >> $@
+ $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
+ $(Q)echo ';' >> $@
+
ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
endif
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 638875a0960a..28d1605b0338 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -24,7 +24,7 @@
#include <linux/kernel.h>
#include "../perf.h"
#include "util.h"
-#include "exec_cmd.h"
+#include <subcmd/exec-cmd.h>
#include "tests.h"
#define ENV "PERF_TEST_ATTR"
@@ -153,7 +153,7 @@ static int run_dir(const char *d, const char *perf)
return system(cmd);
}
-int test__attr(void)
+int test__attr(int subtest __maybe_unused)
{
struct stat st;
char path_perf[PATH_MAX];
@@ -164,7 +164,7 @@ int test__attr(void)
return run_dir("./tests", "./perf");
/* Then installed path. */
- snprintf(path_dir, PATH_MAX, "%s/tests", perf_exec_path());
+ snprintf(path_dir, PATH_MAX, "%s/tests", get_argv_exec_path());
snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR);
if (!lstat(path_dir, &st) &&
diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
index a02b035fd5aa..fb80c9eb6a95 100644
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c
@@ -111,7 +111,7 @@ static long long bp_count(int fd)
return count;
}
-int test__bp_signal(void)
+int test__bp_signal(int subtest __maybe_unused)
{
struct sigaction sa;
long long count1, count2;
diff --git a/tools/perf/tests/bp_signal_overflow.c b/tools/perf/tests/bp_signal_overflow.c
index e76537724491..89f92fa67cc4 100644
--- a/tools/perf/tests/bp_signal_overflow.c
+++ b/tools/perf/tests/bp_signal_overflow.c
@@ -58,7 +58,7 @@ static long long bp_count(int fd)
#define EXECUTIONS 10000
#define THRESHOLD 100
-int test__bp_signal_overflow(void)
+int test__bp_signal_overflow(int subtest __maybe_unused)
{
struct perf_event_attr pe;
struct sigaction sa;
diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c
new file mode 100644
index 000000000000..7230e62c70fc
--- /dev/null
+++ b/tools/perf/tests/bpf-script-test-prologue.c
@@ -0,0 +1,35 @@
+/*
+ * bpf-script-test-prologue.c
+ * Test BPF prologue
+ */
+#ifndef LINUX_VERSION_CODE
+# error Need LINUX_VERSION_CODE
+# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
+#endif
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+#include <uapi/linux/fs.h>
+
+#define FMODE_READ 0x1
+#define FMODE_WRITE 0x2
+
+static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
+ (void *) 6;
+
+SEC("func=null_lseek file->f_mode offset orig")
+int bpf_func__null_lseek(void *ctx, int err, unsigned long f_mode,
+ unsigned long offset, unsigned long orig)
+{
+ if (err)
+ return 0;
+ if (f_mode & FMODE_WRITE)
+ return 0;
+ if (offset & 1)
+ return 0;
+ if (orig == SEEK_CUR)
+ return 0;
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index ec16f7812c8b..33689a0cf821 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -19,6 +19,29 @@ static int epoll_pwait_loop(void)
return 0;
}
+#ifdef HAVE_BPF_PROLOGUE
+
+static int llseek_loop(void)
+{
+ int fds[2], i;
+
+ fds[0] = open("/dev/null", O_RDONLY);
+ fds[1] = open("/dev/null", O_RDWR);
+
+ if (fds[0] < 0 || fds[1] < 0)
+ return -1;
+
+ for (i = 0; i < NR_ITERS; i++) {
+ lseek(fds[i % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET);
+ lseek(fds[(i + 1) % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET);
+ }
+ close(fds[0]);
+ close(fds[1]);
+ return 0;
+}
+
+#endif
+
static struct {
enum test_llvm__testcase prog_id;
const char *desc;
@@ -37,6 +60,17 @@ static struct {
&epoll_pwait_loop,
(NR_ITERS + 1) / 2,
},
+#ifdef HAVE_BPF_PROLOGUE
+ {
+ LLVM_TESTCASE_BPF_PROLOGUE,
+ "Test BPF prologue generation",
+ "[bpf_prologue_test]",
+ "fix kbuild first",
+ "check your vmlinux setting?",
+ &llseek_loop,
+ (NR_ITERS + 1) / 4,
+ },
+#endif
};
static int do_test(struct bpf_object *obj, int (*func)(void),
@@ -68,8 +102,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj);
if (err || list_empty(&parse_evlist.list)) {
pr_debug("Failed to add events selected by BPF\n");
- if (!err)
- return TEST_FAIL;
+ return TEST_FAIL;
}
snprintf(pid, sizeof(pid), "%d", getpid());
@@ -123,8 +156,10 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
}
}
- if (count != expect)
+ if (count != expect) {
pr_debug("BPF filter result incorrect\n");
+ goto out_delete_evlist;
+ }
ret = TEST_OK;
@@ -146,7 +181,7 @@ prepare_bpf(void *obj_buf, size_t obj_buf_sz, const char *name)
return obj;
}
-static int __test__bpf(int index)
+static int __test__bpf(int idx)
{
int ret;
void *obj_buf;
@@ -154,54 +189,72 @@ static int __test__bpf(int index)
struct bpf_object *obj;
ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
- bpf_testcase_table[index].prog_id,
+ bpf_testcase_table[idx].prog_id,
true);
if (ret != TEST_OK || !obj_buf || !obj_buf_sz) {
pr_debug("Unable to get BPF object, %s\n",
- bpf_testcase_table[index].msg_compile_fail);
- if (index == 0)
+ bpf_testcase_table[idx].msg_compile_fail);
+ if (idx == 0)
return TEST_SKIP;
else
return TEST_FAIL;
}
obj = prepare_bpf(obj_buf, obj_buf_sz,
- bpf_testcase_table[index].name);
+ bpf_testcase_table[idx].name);
if (!obj) {
ret = TEST_FAIL;
goto out;
}
ret = do_test(obj,
- bpf_testcase_table[index].target_func,
- bpf_testcase_table[index].expect_result);
+ bpf_testcase_table[idx].target_func,
+ bpf_testcase_table[idx].expect_result);
out:
bpf__clear();
return ret;
}
-int test__bpf(void)
+int test__bpf_subtest_get_nr(void)
+{
+ return (int)ARRAY_SIZE(bpf_testcase_table);
+}
+
+const char *test__bpf_subtest_get_desc(int i)
+{
+ if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table))
+ return NULL;
+ return bpf_testcase_table[i].desc;
+}
+
+int test__bpf(int i)
{
- unsigned int i;
int err;
+ if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table))
+ return TEST_FAIL;
+
if (geteuid() != 0) {
pr_debug("Only root can run BPF test\n");
return TEST_SKIP;
}
- for (i = 0; i < ARRAY_SIZE(bpf_testcase_table); i++) {
- err = __test__bpf(i);
+ err = __test__bpf(i);
+ return err;
+}
- if (err != TEST_OK)
- return err;
- }
+#else
+int test__bpf_subtest_get_nr(void)
+{
+ return 0;
+}
- return TEST_OK;
+const char *test__bpf_subtest_get_desc(int i __maybe_unused)
+{
+ return NULL;
}
-#else
-int test__bpf(void)
+int test__bpf(int i __maybe_unused)
{
pr_debug("Skip BPF test because BPF support is not compiled\n");
return TEST_SKIP;
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 80c442eab767..f2b1dcac45d3 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -11,7 +11,7 @@
#include "tests.h"
#include "debug.h"
#include "color.h"
-#include "parse-options.h"
+#include <subcmd/parse-options.h>
#include "symbol.h"
struct test __weak arch_tests[] = {
@@ -160,6 +160,11 @@ static struct test generic_tests[] = {
{
.desc = "Test LLVM searching and compiling",
.func = test__llvm,
+ .subtest = {
+ .skip_if_fail = true,
+ .get_nr = test__llvm_subtest_get_nr,
+ .get_desc = test__llvm_subtest_get_desc,
+ },
},
{
.desc = "Test topology in session",
@@ -168,6 +173,35 @@ static struct test generic_tests[] = {
{
.desc = "Test BPF filter",
.func = test__bpf,
+ .subtest = {
+ .skip_if_fail = true,
+ .get_nr = test__bpf_subtest_get_nr,
+ .get_desc = test__bpf_subtest_get_desc,
+ },
+ },
+ {
+ .desc = "Test thread map synthesize",
+ .func = test__thread_map_synthesize,
+ },
+ {
+ .desc = "Test cpu map synthesize",
+ .func = test__cpu_map_synthesize,
+ },
+ {
+ .desc = "Test stat config synthesize",
+ .func = test__synthesize_stat_config,
+ },
+ {
+ .desc = "Test stat synthesize",
+ .func = test__synthesize_stat,
+ },
+ {
+ .desc = "Test stat round synthesize",
+ .func = test__synthesize_stat_round,
+ },
+ {
+ .desc = "Test attr update synthesize",
+ .func = test__event_update,
},
{
.func = NULL,
@@ -203,7 +237,7 @@ static bool perf_test__matches(struct test *test, int curr, int argc, const char
return false;
}
-static int run_test(struct test *test)
+static int run_test(struct test *test, int subtest)
{
int status, err = -1, child = fork();
char sbuf[STRERR_BUFSIZE];
@@ -216,7 +250,22 @@ static int run_test(struct test *test)
if (!child) {
pr_debug("test child forked, pid %d\n", getpid());
- err = test->func();
+ if (!verbose) {
+ int nullfd = open("/dev/null", O_WRONLY);
+ if (nullfd >= 0) {
+ close(STDERR_FILENO);
+ close(STDOUT_FILENO);
+
+ dup2(nullfd, STDOUT_FILENO);
+ dup2(STDOUT_FILENO, STDERR_FILENO);
+ close(nullfd);
+ }
+ } else {
+ signal(SIGSEGV, sighandler_dump_stack);
+ signal(SIGFPE, sighandler_dump_stack);
+ }
+
+ err = test->func(subtest);
exit(err);
}
@@ -237,6 +286,40 @@ static int run_test(struct test *test)
for (j = 0; j < ARRAY_SIZE(tests); j++) \
for (t = &tests[j][0]; t->func; t++)
+static int test_and_print(struct test *t, bool force_skip, int subtest)
+{
+ int err;
+
+ if (!force_skip) {
+ pr_debug("\n--- start ---\n");
+ err = run_test(t, subtest);
+ pr_debug("---- end ----\n");
+ } else {
+ pr_debug("\n--- force skipped ---\n");
+ err = TEST_SKIP;
+ }
+
+ if (!t->subtest.get_nr)
+ pr_debug("%s:", t->desc);
+ else
+ pr_debug("%s subtest %d:", t->desc, subtest);
+
+ switch (err) {
+ case TEST_OK:
+ pr_info(" Ok\n");
+ break;
+ case TEST_SKIP:
+ color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
+ break;
+ case TEST_FAIL:
+ default:
+ color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n");
+ break;
+ }
+
+ return err;
+}
+
static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
{
struct test *t;
@@ -264,21 +347,43 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
continue;
}
- pr_debug("\n--- start ---\n");
- err = run_test(t);
- pr_debug("---- end ----\n%s:", t->desc);
-
- switch (err) {
- case TEST_OK:
- pr_info(" Ok\n");
- break;
- case TEST_SKIP:
- color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
- break;
- case TEST_FAIL:
- default:
- color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n");
- break;
+ if (!t->subtest.get_nr) {
+ test_and_print(t, false, -1);
+ } else {
+ int subn = t->subtest.get_nr();
+ /*
+ * minus 2 to align with normal testcases.
+ * For subtest we print additional '.x' in number.
+ * for example:
+ *
+ * 35: Test LLVM searching and compiling :
+ * 35.1: Basic BPF llvm compiling test : Ok
+ */
+ int subw = width > 2 ? width - 2 : width;
+ bool skip = false;
+ int subi;
+
+ if (subn <= 0) {
+ color_fprintf(stderr, PERF_COLOR_YELLOW,
+ " Skip (not compiled in)\n");
+ continue;
+ }
+ pr_info("\n");
+
+ for (subi = 0; subi < subn; subi++) {
+ int len = strlen(t->subtest.get_desc(subi));
+
+ if (subw < len)
+ subw = len;
+ }
+
+ for (subi = 0; subi < subn; subi++) {
+ pr_info("%2d.%1d: %-*s:", i, subi + 1, subw,
+ t->subtest.get_desc(subi));
+ err = test_and_print(t, skip, subi);
+ if (err != TEST_OK && t->subtest.skip_if_fail)
+ skip = true;
+ }
}
}
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index a767a6400c5c..313a48c6b2bc 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -433,7 +433,6 @@ enum {
static int do_test_code_reading(bool try_kcore)
{
- struct machines machines;
struct machine *machine;
struct thread *thread;
struct record_opts opts = {
@@ -459,8 +458,7 @@ static int do_test_code_reading(bool try_kcore)
pid = getpid();
- machines__init(&machines);
- machine = &machines.host;
+ machine = machine__new_host();
ret = machine__create_kernel_maps(machine);
if (ret < 0) {
@@ -549,6 +547,13 @@ static int do_test_code_reading(bool try_kcore)
if (ret < 0) {
if (!excl_kernel) {
excl_kernel = true;
+ /*
+ * Both cpus and threads are now owned by evlist
+ * and will be freed by following perf_evlist__set_maps
+ * call. Getting refference to keep them alive.
+ */
+ cpu_map__get(cpus);
+ thread_map__get(threads);
perf_evlist__set_maps(evlist, NULL, NULL);
perf_evlist__delete(evlist);
evlist = NULL;
@@ -594,14 +599,13 @@ out_err:
cpu_map__put(cpus);
thread_map__put(threads);
}
- machines__destroy_kernel_maps(&machines);
machine__delete_threads(machine);
- machines__exit(&machines);
+ machine__delete(machine);
return err;
}
-int test__code_reading(void)
+int test__code_reading(int subtest __maybe_unused)
{
int ret;
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
new file mode 100644
index 000000000000..4cb6418a8ffc
--- /dev/null
+++ b/tools/perf/tests/cpumap.c
@@ -0,0 +1,88 @@
+#include "tests.h"
+#include "cpumap.h"
+
+static int process_event_mask(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct cpu_map_event *map_event = &event->cpu_map;
+ struct cpu_map_mask *mask;
+ struct cpu_map_data *data;
+ struct cpu_map *map;
+ int i;
+
+ data = &map_event->data;
+
+ TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__MASK);
+
+ mask = (struct cpu_map_mask *)data->data;
+
+ TEST_ASSERT_VAL("wrong nr", mask->nr == 1);
+
+ for (i = 0; i < 20; i++) {
+ TEST_ASSERT_VAL("wrong cpu", test_bit(i, mask->mask));
+ }
+
+ map = cpu_map__new_data(data);
+ TEST_ASSERT_VAL("wrong nr", map->nr == 20);
+
+ for (i = 0; i < 20; i++) {
+ TEST_ASSERT_VAL("wrong cpu", map->map[i] == i);
+ }
+
+ cpu_map__put(map);
+ return 0;
+}
+
+static int process_event_cpus(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct cpu_map_event *map_event = &event->cpu_map;
+ struct cpu_map_entries *cpus;
+ struct cpu_map_data *data;
+ struct cpu_map *map;
+
+ data = &map_event->data;
+
+ TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__CPUS);
+
+ cpus = (struct cpu_map_entries *)data->data;
+
+ TEST_ASSERT_VAL("wrong nr", cpus->nr == 2);
+ TEST_ASSERT_VAL("wrong cpu", cpus->cpu[0] == 1);
+ TEST_ASSERT_VAL("wrong cpu", cpus->cpu[1] == 256);
+
+ map = cpu_map__new_data(data);
+ TEST_ASSERT_VAL("wrong nr", map->nr == 2);
+ TEST_ASSERT_VAL("wrong cpu", map->map[0] == 1);
+ TEST_ASSERT_VAL("wrong cpu", map->map[1] == 256);
+ TEST_ASSERT_VAL("wrong refcnt", atomic_read(&map->refcnt) == 1);
+ cpu_map__put(map);
+ return 0;
+}
+
+
+int test__cpu_map_synthesize(int subtest __maybe_unused)
+{
+ struct cpu_map *cpus;
+
+ /* This one is better stores in mask. */
+ cpus = cpu_map__new("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19");
+
+ TEST_ASSERT_VAL("failed to synthesize map",
+ !perf_event__synthesize_cpu_map(NULL, cpus, process_event_mask, NULL));
+
+ cpu_map__put(cpus);
+
+ /* This one is better stores in cpu values. */
+ cpus = cpu_map__new("1,256");
+
+ TEST_ASSERT_VAL("failed to synthesize map",
+ !perf_event__synthesize_cpu_map(NULL, cpus, process_event_cpus, NULL));
+
+ cpu_map__put(cpus);
+ return 0;
+}
diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index a218aeaf56a0..dc673ff7c437 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -110,7 +110,7 @@ static int dso__data_fd(struct dso *dso, struct machine *machine)
return fd;
}
-int test__dso_data(void)
+int test__dso_data(int subtest __maybe_unused)
{
struct machine machine;
struct dso *dso;
@@ -245,7 +245,7 @@ static int set_fd_limit(int n)
return setrlimit(RLIMIT_NOFILE, &rlim);
}
-int test__dso_data_cache(void)
+int test__dso_data_cache(int subtest __maybe_unused)
{
struct machine machine;
long nr_end, nr = open_files_cnt();
@@ -302,7 +302,7 @@ int test__dso_data_cache(void)
return 0;
}
-int test__dso_data_reopen(void)
+int test__dso_data_reopen(int subtest __maybe_unused)
{
struct machine machine;
long nr_end, nr = open_files_cnt();
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 07221793a3ac..1c5c0221cea2 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -51,6 +51,12 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
"krava_1",
"test__dwarf_unwind"
};
+ /*
+ * The funcs[MAX_STACK] array index, based on the
+ * callchain order setup.
+ */
+ int idx = callchain_param.order == ORDER_CALLER ?
+ MAX_STACK - *cnt - 1 : *cnt;
if (*cnt >= MAX_STACK) {
pr_debug("failed: crossed the max stack value %d\n", MAX_STACK);
@@ -63,8 +69,10 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
return -1;
}
- pr_debug("got: %s 0x%" PRIx64 "\n", symbol, entry->ip);
- return strcmp((const char *) symbol, funcs[(*cnt)++]);
+ (*cnt)++;
+ pr_debug("got: %s 0x%" PRIx64 ", expecting %s\n",
+ symbol, entry->ip, funcs[idx]);
+ return strcmp((const char *) symbol, funcs[idx]);
}
__attribute__ ((noinline))
@@ -105,8 +113,16 @@ static int compare(void *p1, void *p2)
/* Any possible value should be 'thread' */
struct thread *thread = *(struct thread **)p1;
- if (global_unwind_retval == -INT_MAX)
+ if (global_unwind_retval == -INT_MAX) {
+ /* Call unwinder twice for both callchain orders. */
+ callchain_param.order = ORDER_CALLER;
+
global_unwind_retval = unwind_thread(thread);
+ if (!global_unwind_retval) {
+ callchain_param.order = ORDER_CALLEE;
+ global_unwind_retval = unwind_thread(thread);
+ }
+ }
return p1 - p2;
}
@@ -142,21 +158,23 @@ static int krava_1(struct thread *thread)
return krava_2(thread);
}
-int test__dwarf_unwind(void)
+int test__dwarf_unwind(int subtest __maybe_unused)
{
- struct machines machines;
struct machine *machine;
struct thread *thread;
int err = -1;
- machines__init(&machines);
-
- machine = machines__find(&machines, HOST_KERNEL_ID);
+ machine = machine__new_host();
if (!machine) {
pr_err("Could not get machine\n");
return -1;
}
+ if (machine__create_kernel_maps(machine)) {
+ pr_err("Failed to create kernel maps\n");
+ return -1;
+ }
+
callchain_param.record_mode = CALLCHAIN_DWARF;
if (init_live_machine(machine)) {
@@ -178,7 +196,6 @@ int test__dwarf_unwind(void)
out:
machine__delete_threads(machine);
- machine__exit(machine);
- machines__exit(&machines);
+ machine__delete(machine);
return err;
}
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
new file mode 100644
index 000000000000..012eab5d1df1
--- /dev/null
+++ b/tools/perf/tests/event_update.c
@@ -0,0 +1,117 @@
+#include <linux/compiler.h>
+#include "evlist.h"
+#include "evsel.h"
+#include "machine.h"
+#include "tests.h"
+#include "debug.h"
+
+static int process_event_unit(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct event_update_event *ev = (struct event_update_event *) event;
+
+ TEST_ASSERT_VAL("wrong id", ev->id == 123);
+ TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__UNIT);
+ TEST_ASSERT_VAL("wrong unit", !strcmp(ev->data, "KRAVA"));
+ return 0;
+}
+
+static int process_event_scale(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct event_update_event *ev = (struct event_update_event *) event;
+ struct event_update_event_scale *ev_data;
+
+ ev_data = (struct event_update_event_scale *) ev->data;
+
+ TEST_ASSERT_VAL("wrong id", ev->id == 123);
+ TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__SCALE);
+ TEST_ASSERT_VAL("wrong scale", ev_data->scale = 0.123);
+ return 0;
+}
+
+struct event_name {
+ struct perf_tool tool;
+ const char *name;
+};
+
+static int process_event_name(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct event_name *tmp = container_of(tool, struct event_name, tool);
+ struct event_update_event *ev = (struct event_update_event*) event;
+
+ TEST_ASSERT_VAL("wrong id", ev->id == 123);
+ TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__NAME);
+ TEST_ASSERT_VAL("wrong name", !strcmp(ev->data, tmp->name));
+ return 0;
+}
+
+static int process_event_cpus(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct event_update_event *ev = (struct event_update_event*) event;
+ struct event_update_event_cpus *ev_data;
+ struct cpu_map *map;
+
+ ev_data = (struct event_update_event_cpus*) ev->data;
+
+ map = cpu_map__new_data(&ev_data->cpus);
+
+ TEST_ASSERT_VAL("wrong id", ev->id == 123);
+ TEST_ASSERT_VAL("wrong type", ev->type == PERF_EVENT_UPDATE__CPUS);
+ TEST_ASSERT_VAL("wrong cpus", map->nr == 3);
+ TEST_ASSERT_VAL("wrong cpus", map->map[0] == 1);
+ TEST_ASSERT_VAL("wrong cpus", map->map[1] == 2);
+ TEST_ASSERT_VAL("wrong cpus", map->map[2] == 3);
+ cpu_map__put(map);
+ return 0;
+}
+
+int test__event_update(int subtest __maybe_unused)
+{
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel;
+ struct event_name tmp;
+
+ evlist = perf_evlist__new_default();
+ TEST_ASSERT_VAL("failed to get evlist", evlist);
+
+ evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("failed to allos ids",
+ !perf_evsel__alloc_id(evsel, 1, 1));
+
+ perf_evlist__id_add(evlist, evsel, 0, 0, 123);
+
+ evsel->unit = strdup("KRAVA");
+
+ TEST_ASSERT_VAL("failed to synthesize attr update unit",
+ !perf_event__synthesize_event_update_unit(NULL, evsel, process_event_unit));
+
+ evsel->scale = 0.123;
+
+ TEST_ASSERT_VAL("failed to synthesize attr update scale",
+ !perf_event__synthesize_event_update_scale(NULL, evsel, process_event_scale));
+
+ tmp.name = perf_evsel__name(evsel);
+
+ TEST_ASSERT_VAL("failed to synthesize attr update name",
+ !perf_event__synthesize_event_update_name(&tmp.tool, evsel, process_event_name));
+
+ evsel->own_cpus = cpu_map__new("1,2,3");
+
+ TEST_ASSERT_VAL("failed to synthesize attr update cpus",
+ !perf_event__synthesize_event_update_cpus(&tmp.tool, evsel, process_event_cpus));
+
+ cpu_map__put(evsel->own_cpus);
+ return 0;
+}
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index 3fa715987a5e..2de4a4f2c3ed 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -95,7 +95,7 @@ out_delete_evlist:
#define perf_evsel__name_array_test(names) \
__perf_evsel__name_array_test(names, ARRAY_SIZE(names))
-int test__perf_evsel__roundtrip_name_test(void)
+int test__perf_evsel__roundtrip_name_test(int subtest __maybe_unused)
{
int err = 0, ret = 0;
@@ -103,7 +103,8 @@ int test__perf_evsel__roundtrip_name_test(void)
if (err)
ret = err;
- err = perf_evsel__name_array_test(perf_evsel__sw_names);
+ err = __perf_evsel__name_array_test(perf_evsel__sw_names,
+ PERF_COUNT_SW_DUMMY + 1);
if (err)
ret = err;
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index 790e413d9a1f..1984b3bbfe15 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -32,7 +32,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
return ret;
}
-int test__perf_evsel__tp_sched_test(void)
+int test__perf_evsel__tp_sched_test(int subtest __maybe_unused)
{
struct perf_evsel *evsel = perf_evsel__newtp("sched", "sched_switch");
int ret = 0;
diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c
index d24b837951d4..c809463edbe5 100644
--- a/tools/perf/tests/fdarray.c
+++ b/tools/perf/tests/fdarray.c
@@ -25,7 +25,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE
return printed + fdarray__fprintf(fda, fp);
}
-int test__fdarray__filter(void)
+int test__fdarray__filter(int subtest __maybe_unused)
{
int nr_fds, expected_fd[2], fd, err = TEST_FAIL;
struct fdarray *fda = fdarray__new(5, 5);
@@ -103,7 +103,7 @@ out:
return err;
}
-int test__fdarray__add(void)
+int test__fdarray__add(int subtest __maybe_unused)
{
int err = TEST_FAIL;
struct fdarray *fda = fdarray__new(2, 2);
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index ce80b274b097..bcfd081ee1d2 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -87,6 +87,11 @@ struct machine *setup_fake_machine(struct machines *machines)
return NULL;
}
+ if (machine__create_kernel_maps(machine)) {
+ pr_debug("Cannot create kernel maps\n");
+ return NULL;
+ }
+
for (i = 0; i < ARRAY_SIZE(fake_threads); i++) {
struct thread *thread;
@@ -150,7 +155,6 @@ struct machine *setup_fake_machine(struct machines *machines)
out:
pr_debug("Not enough memory for machine setup\n");
machine__delete_threads(machine);
- machine__delete(machine);
return NULL;
}
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 7ed737019de7..e36089212061 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -281,7 +281,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine)
symbol_conf.cumulate_callchain = false;
perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
- setup_sorting();
+ setup_sorting(NULL);
callchain_register_param(&callchain_param);
err = add_hist_entries(hists, machine);
@@ -428,7 +428,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine)
symbol_conf.cumulate_callchain = false;
perf_evsel__set_sample_bit(evsel, CALLCHAIN);
- setup_sorting();
+ setup_sorting(NULL);
callchain_register_param(&callchain_param);
err = add_hist_entries(hists, machine);
@@ -486,7 +486,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine)
symbol_conf.cumulate_callchain = true;
perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
- setup_sorting();
+ setup_sorting(NULL);
callchain_register_param(&callchain_param);
err = add_hist_entries(hists, machine);
@@ -670,7 +670,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine)
symbol_conf.cumulate_callchain = true;
perf_evsel__set_sample_bit(evsel, CALLCHAIN);
- setup_sorting();
+ setup_sorting(NULL);
callchain_register_param(&callchain_param);
err = add_hist_entries(hists, machine);
@@ -686,7 +686,7 @@ out:
return err;
}
-int test__hists_cumulate(void)
+int test__hists_cumulate(int subtest __maybe_unused)
{
int err = TEST_FAIL;
struct machines machines;
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 818acf875dd0..2a784befd9ce 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -104,7 +104,7 @@ out:
return TEST_FAIL;
}
-int test__hists_filter(void)
+int test__hists_filter(int subtest __maybe_unused)
{
int err = TEST_FAIL;
struct machines machines;
@@ -122,7 +122,7 @@ int test__hists_filter(void)
goto out;
/* default sort order (comm,dso,sym) will be used */
- if (setup_sorting() < 0)
+ if (setup_sorting(NULL) < 0)
goto out;
machines__init(&machines);
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 8c102b011424..c764d69ac6ef 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -64,7 +64,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
struct perf_evsel *evsel;
struct addr_location al;
struct hist_entry *he;
- struct perf_sample sample = { .period = 1, };
+ struct perf_sample sample = { .period = 1, .weight = 1, };
size_t i = 0, k;
/*
@@ -90,7 +90,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
goto out;
he = __hists__add_entry(hists, &al, NULL,
- NULL, NULL, 1, 1, 0, true);
+ NULL, NULL, &sample, true);
if (he == NULL) {
addr_location__put(&al);
goto out;
@@ -116,7 +116,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
goto out;
he = __hists__add_entry(hists, &al, NULL,
- NULL, NULL, 1, 1, 0, true);
+ NULL, NULL, &sample, true);
if (he == NULL) {
addr_location__put(&al);
goto out;
@@ -274,7 +274,7 @@ static int validate_link(struct hists *leader, struct hists *other)
return __validate_link(leader, 0) || __validate_link(other, 1);
}
-int test__hists_link(void)
+int test__hists_link(int subtest __maybe_unused)
{
int err = -1;
struct hists *hists, *first_hists;
@@ -294,7 +294,7 @@ int test__hists_link(void)
goto out;
/* default sort order (comm,dso,sym) will be used */
- if (setup_sorting() < 0)
+ if (setup_sorting(NULL) < 0)
goto out;
machines__init(&machines);
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index adbebc852cc8..ebe6cd485b5d 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -134,7 +134,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine)
field_order = NULL;
sort_order = NULL; /* equivalent to sort_order = "comm,dso,sym" */
- setup_sorting();
+ setup_sorting(NULL);
/*
* expected output:
@@ -236,7 +236,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine)
field_order = "overhead,cpu";
sort_order = "pid";
- setup_sorting();
+ setup_sorting(NULL);
/*
* expected output:
@@ -292,7 +292,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine)
field_order = "comm,overhead,dso";
sort_order = NULL;
- setup_sorting();
+ setup_sorting(NULL);
/*
* expected output:
@@ -366,7 +366,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine)
field_order = "dso,sym,comm,overhead,dso";
sort_order = "sym";
- setup_sorting();
+ setup_sorting(NULL);
/*
* expected output:
@@ -468,7 +468,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine)
field_order = "cpu,pid,comm,dso,sym";
sort_order = "dso,pid";
- setup_sorting();
+ setup_sorting(NULL);
/*
* expected output:
@@ -576,7 +576,7 @@ out:
return err;
}
-int test__hists_output(void)
+int test__hists_output(int subtest __maybe_unused)
{
int err = TEST_FAIL;
struct machines machines;
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index a2e2269aa093..ddb78fae064a 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -49,13 +49,12 @@ static int find_comm(struct perf_evlist *evlist, const char *comm)
* when an event is disabled but a dummy software event is not disabled. If the
* test passes %0 is returned, otherwise %-1 is returned.
*/
-int test__keep_tracking(void)
+int test__keep_tracking(int subtest __maybe_unused)
{
struct record_opts opts = {
.mmap_pages = UINT_MAX,
.user_freq = UINT_MAX,
.user_interval = ULLONG_MAX,
- .freq = 4000,
.target = {
.uses_mmap = true,
},
@@ -124,7 +123,7 @@ int test__keep_tracking(void)
evsel = perf_evlist__last(evlist);
- CHECK__(perf_evlist__disable_event(evlist, evsel));
+ CHECK__(perf_evsel__disable(evsel));
comm = "Test COMM 2";
CHECK__(prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0));
diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c
index 08c433b4bf4f..d2af78193153 100644
--- a/tools/perf/tests/kmod-path.c
+++ b/tools/perf/tests/kmod-path.c
@@ -49,7 +49,7 @@ static int test_is_kernel_module(const char *path, int cpumode, bool expect)
#define M(path, c, e) \
TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e))
-int test__kmod_path__parse(void)
+int test__kmod_path__parse(int subtest __maybe_unused)
{
/* path alloc_name alloc_ext kmod comp name ext */
T("/xxxx/xxxx/x-x.ko", true , true , true, false, "[x_x]", NULL);
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
index bc4cf507cde5..06f45c1d4256 100644
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c
@@ -44,13 +44,17 @@ static struct {
.source = test_llvm__bpf_test_kbuild_prog,
.desc = "Test kbuild searching",
},
+ [LLVM_TESTCASE_BPF_PROLOGUE] = {
+ .source = test_llvm__bpf_test_prologue_prog,
+ .desc = "Compile source for BPF prologue generation test",
+ },
};
int
test_llvm__fetch_bpf_obj(void **p_obj_buf,
size_t *p_obj_buf_sz,
- enum test_llvm__testcase index,
+ enum test_llvm__testcase idx,
bool force)
{
const char *source;
@@ -59,11 +63,11 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf,
char *tmpl_new = NULL, *clang_opt_new = NULL;
int err, old_verbose, ret = TEST_FAIL;
- if (index >= __LLVM_TESTCASE_MAX)
+ if (idx >= __LLVM_TESTCASE_MAX)
return TEST_FAIL;
- source = bpf_source_table[index].source;
- desc = bpf_source_table[index].desc;
+ source = bpf_source_table[idx].source;
+ desc = bpf_source_table[idx].desc;
perf_config(perf_config_cb, NULL);
@@ -127,44 +131,39 @@ out:
return ret;
}
-int test__llvm(void)
+int test__llvm(int subtest)
{
- enum test_llvm__testcase i;
+ int ret;
+ void *obj_buf = NULL;
+ size_t obj_buf_sz = 0;
- for (i = 0; i < __LLVM_TESTCASE_MAX; i++) {
- int ret;
- void *obj_buf = NULL;
- size_t obj_buf_sz = 0;
+ if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
+ return TEST_FAIL;
- ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
- i, false);
+ ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
+ subtest, false);
- if (ret == TEST_OK) {
- ret = test__bpf_parsing(obj_buf, obj_buf_sz);
- if (ret != TEST_OK)
- pr_debug("Failed to parse test case '%s'\n",
- bpf_source_table[i].desc);
- }
- free(obj_buf);
-
- switch (ret) {
- case TEST_SKIP:
- return TEST_SKIP;
- case TEST_OK:
- break;
- default:
- /*
- * Test 0 is the basic LLVM test. If test 0
- * fail, the basic LLVM support not functional
- * so the whole test should fail. If other test
- * case fail, it can be fixed by adjusting
- * config so don't report error.
- */
- if (i == 0)
- return TEST_FAIL;
- else
- return TEST_SKIP;
+ if (ret == TEST_OK) {
+ ret = test__bpf_parsing(obj_buf, obj_buf_sz);
+ if (ret != TEST_OK) {
+ pr_debug("Failed to parse test case '%s'\n",
+ bpf_source_table[subtest].desc);
}
}
- return TEST_OK;
+ free(obj_buf);
+
+ return ret;
+}
+
+int test__llvm_subtest_get_nr(void)
+{
+ return __LLVM_TESTCASE_MAX;
+}
+
+const char *test__llvm_subtest_get_desc(int subtest)
+{
+ if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
+ return NULL;
+
+ return bpf_source_table[subtest].desc;
}
diff --git a/tools/perf/tests/llvm.h b/tools/perf/tests/llvm.h
index d91d8f44efee..5150b4d6ef50 100644
--- a/tools/perf/tests/llvm.h
+++ b/tools/perf/tests/llvm.h
@@ -6,10 +6,12 @@
extern const char test_llvm__bpf_base_prog[];
extern const char test_llvm__bpf_test_kbuild_prog[];
+extern const char test_llvm__bpf_test_prologue_prog[];
enum test_llvm__testcase {
LLVM_TESTCASE_BASE,
LLVM_TESTCASE_KBUILD,
+ LLVM_TESTCASE_BPF_PROLOGUE,
__LLVM_TESTCASE_MAX,
};
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index 8ea3dffc5065..c1fbb8e884c0 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -259,7 +259,8 @@ $(run_O):
tarpkg:
@cmd="$(PERF)/tests/perf-targz-src-pkg $(PERF)"; \
echo "- $@: $$cmd" && echo $$cmd > $@ && \
- ( eval $$cmd ) >> $@ 2>&1
+ ( eval $$cmd ) >> $@ 2>&1 && \
+ rm -f $@
make_kernelsrc:
@echo "- make -C <kernelsrc> tools/perf"
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 4495493c9431..359e98fcd94c 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -16,7 +16,7 @@
* Then it checks if the number of syscalls reported as perf events by
* the kernel corresponds to the number of syscalls made.
*/
-int test__basic_mmap(void)
+int test__basic_mmap(int subtest __maybe_unused)
{
int err = -1;
union perf_event *event;
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
index 145050e2e544..0c5ce44f723f 100644
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -149,7 +149,6 @@ static int synth_process(struct machine *machine)
static int mmap_events(synth_cb synth)
{
- struct machines machines;
struct machine *machine;
int err, i;
@@ -162,8 +161,7 @@ static int mmap_events(synth_cb synth)
*/
TEST_ASSERT_VAL("failed to create threads", !threads_create());
- machines__init(&machines);
- machine = &machines.host;
+ machine = machine__new_host();
dump_trace = verbose > 1 ? 1 : 0;
@@ -203,7 +201,7 @@ static int mmap_events(synth_cb synth)
}
machine__delete_threads(machine);
- machines__exit(&machines);
+ machine__delete(machine);
return err;
}
@@ -221,7 +219,7 @@ static int mmap_events(synth_cb synth)
*
* by using all thread objects.
*/
-int test__mmap_thread_lookup(void)
+int test__mmap_thread_lookup(int subtest __maybe_unused)
{
/* perf_event__synthesize_threads synthesize */
TEST_ASSERT_VAL("failed with sythesizing all",
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index 2006485a2859..53c2273e8859 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -7,7 +7,7 @@
#include "debug.h"
#include "stat.h"
-int test__openat_syscall_event_on_all_cpus(void)
+int test__openat_syscall_event_on_all_cpus(int subtest __maybe_unused)
{
int err = -1, fd, cpu;
struct cpu_map *cpus;
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index 5e811cd8f1c3..eb99a105f31c 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -6,7 +6,7 @@
#include "tests.h"
#include "debug.h"
-int test__syscall_openat_tp_fields(void)
+int test__syscall_openat_tp_fields(int subtest __maybe_unused)
{
struct record_opts opts = {
.target = {
diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c
index 033b54797b8a..1184f9ba6499 100644
--- a/tools/perf/tests/openat-syscall.c
+++ b/tools/perf/tests/openat-syscall.c
@@ -5,7 +5,7 @@
#include "debug.h"
#include "tests.h"
-int test__openat_syscall_event(void)
+int test__openat_syscall_event(int subtest __maybe_unused)
{
int err = -1, fd;
struct perf_evsel *evsel;
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 636d7b42d844..abe8849d1d70 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1765,7 +1765,7 @@ static void debug_warn(const char *warn, va_list params)
fprintf(stderr, " Warning: %s\n", msg);
}
-int test__parse_events(void)
+int test__parse_events(int subtest __maybe_unused)
{
int ret1, ret2 = 0;
diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c
index 2c63ea658541..294c76b01b41 100644
--- a/tools/perf/tests/parse-no-sample-id-all.c
+++ b/tools/perf/tests/parse-no-sample-id-all.c
@@ -67,7 +67,7 @@ struct test_attr_event {
*
* Return: %0 on success, %-1 if the test fails.
*/
-int test__parse_no_sample_id_all(void)
+int test__parse_no_sample_id_all(int subtest __maybe_unused)
{
int err;
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 7a228a2a070b..1cc78cefe399 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -32,7 +32,7 @@ realloc:
return cpu;
}
-int test__PERF_RECORD(void)
+int test__PERF_RECORD(int subtest __maybe_unused)
{
struct record_opts opts = {
.target = {
@@ -40,12 +40,11 @@ int test__PERF_RECORD(void)
.uses_mmap = true,
},
.no_buffering = true,
- .freq = 10,
.mmap_pages = 256,
};
cpu_set_t cpu_mask;
size_t cpu_mask_size = sizeof(cpu_mask);
- struct perf_evlist *evlist = perf_evlist__new_default();
+ struct perf_evlist *evlist = perf_evlist__new_dummy();
struct perf_evsel *evsel;
struct perf_sample sample;
const char *cmd = "sleep";
@@ -61,6 +60,9 @@ int test__PERF_RECORD(void)
int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, };
char sbuf[STRERR_BUFSIZE];
+ if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
+ evlist = perf_evlist__new_default();
+
if (evlist == NULL || argv == NULL) {
pr_debug("Not enough memory to create evlist\n");
goto out;
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index faa04e9d5d5f..1e2ba2602930 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -133,7 +133,7 @@ static struct list_head *test_terms_list(void)
return &terms;
}
-int test__pmu(void)
+int test__pmu(int subtest __maybe_unused)
{
char *format = test_format_dir_get();
LIST_HEAD(formats);
diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c
index 7760277c6def..7a52834ee0d0 100644
--- a/tools/perf/tests/python-use.c
+++ b/tools/perf/tests/python-use.c
@@ -4,11 +4,12 @@
#include <stdio.h>
#include <stdlib.h>
+#include <linux/compiler.h>
#include "tests.h"
extern int verbose;
-int test__python_use(void)
+int test__python_use(int subtest __maybe_unused)
{
char *cmd;
int ret;
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 30c02181e78b..5f23710b9fee 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -290,7 +290,7 @@ out_free:
* checks sample format bits separately and together. If the test passes %0 is
* returned, otherwise %-1 is returned.
*/
-int test__sample_parsing(void)
+int test__sample_parsing(int subtest __maybe_unused)
{
const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15};
u64 sample_type;
diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c
new file mode 100644
index 000000000000..6a20ff2326bb
--- /dev/null
+++ b/tools/perf/tests/stat.c
@@ -0,0 +1,111 @@
+#include <linux/compiler.h>
+#include "event.h"
+#include "tests.h"
+#include "stat.h"
+#include "counts.h"
+#include "debug.h"
+
+static bool has_term(struct stat_config_event *config,
+ u64 tag, u64 val)
+{
+ unsigned i;
+
+ for (i = 0; i < config->nr; i++) {
+ if ((config->data[i].tag == tag) &&
+ (config->data[i].val == val))
+ return true;
+ }
+
+ return false;
+}
+
+static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct stat_config_event *config = &event->stat_config;
+ struct perf_stat_config stat_config;
+
+#define HAS(term, val) \
+ has_term(config, PERF_STAT_CONFIG_TERM__##term, val)
+
+ TEST_ASSERT_VAL("wrong nr", config->nr == PERF_STAT_CONFIG_TERM__MAX);
+ TEST_ASSERT_VAL("wrong aggr_mode", HAS(AGGR_MODE, AGGR_CORE));
+ TEST_ASSERT_VAL("wrong scale", HAS(SCALE, 1));
+ TEST_ASSERT_VAL("wrong interval", HAS(INTERVAL, 1));
+
+#undef HAS
+
+ perf_event__read_stat_config(&stat_config, config);
+
+ TEST_ASSERT_VAL("wrong aggr_mode", stat_config.aggr_mode == AGGR_CORE);
+ TEST_ASSERT_VAL("wrong scale", stat_config.scale == 1);
+ TEST_ASSERT_VAL("wrong interval", stat_config.interval == 1);
+ return 0;
+}
+
+int test__synthesize_stat_config(int subtest __maybe_unused)
+{
+ struct perf_stat_config stat_config = {
+ .aggr_mode = AGGR_CORE,
+ .scale = 1,
+ .interval = 1,
+ };
+
+ TEST_ASSERT_VAL("failed to synthesize stat_config",
+ !perf_event__synthesize_stat_config(NULL, &stat_config, process_stat_config_event, NULL));
+
+ return 0;
+}
+
+static int process_stat_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct stat_event *st = &event->stat;
+
+ TEST_ASSERT_VAL("wrong cpu", st->cpu == 1);
+ TEST_ASSERT_VAL("wrong thread", st->thread == 2);
+ TEST_ASSERT_VAL("wrong id", st->id == 3);
+ TEST_ASSERT_VAL("wrong val", st->val == 100);
+ TEST_ASSERT_VAL("wrong run", st->ena == 200);
+ TEST_ASSERT_VAL("wrong ena", st->run == 300);
+ return 0;
+}
+
+int test__synthesize_stat(int subtest __maybe_unused)
+{
+ struct perf_counts_values count;
+
+ count.val = 100;
+ count.ena = 200;
+ count.run = 300;
+
+ TEST_ASSERT_VAL("failed to synthesize stat_config",
+ !perf_event__synthesize_stat(NULL, 1, 2, 3, &count, process_stat_event, NULL));
+
+ return 0;
+}
+
+static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct stat_round_event *stat_round = &event->stat_round;
+
+ TEST_ASSERT_VAL("wrong time", stat_round->time == 0xdeadbeef);
+ TEST_ASSERT_VAL("wrong type", stat_round->type == PERF_STAT_ROUND_TYPE__INTERVAL);
+ return 0;
+}
+
+int test__synthesize_stat_round(int subtest __maybe_unused)
+{
+ TEST_ASSERT_VAL("failed to synthesize stat_config",
+ !perf_event__synthesize_stat_round(NULL, 0xdeadbeef, PERF_STAT_ROUND_TYPE__INTERVAL,
+ process_stat_round_event, NULL));
+
+ return 0;
+}
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index 5b83f56a3b6f..36e8ce1550e3 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -122,7 +122,7 @@ out_delete_evlist:
return err;
}
-int test__sw_clock_freq(void)
+int test__sw_clock_freq(int subtest __maybe_unused)
{
int ret;
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index a02af503100c..ebd80168d51e 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -305,7 +305,7 @@ out_free_nodes:
* evsel->system_wide and evsel->tracking flags (respectively) with other events
* sometimes enabled or disabled.
*/
-int test__switch_tracking(void)
+int test__switch_tracking(int subtest __maybe_unused)
{
const char *sched_switch = "sched:sched_switch";
struct switch_tracking switch_tracking = { .tids = NULL, };
@@ -455,7 +455,7 @@ int test__switch_tracking(void)
perf_evlist__enable(evlist);
- err = perf_evlist__disable_event(evlist, cpu_clocks_evsel);
+ err = perf_evsel__disable(cpu_clocks_evsel);
if (err) {
pr_debug("perf_evlist__disable_event failed!\n");
goto out_err;
@@ -474,7 +474,7 @@ int test__switch_tracking(void)
goto out_err;
}
- err = perf_evlist__disable_event(evlist, cycles_evsel);
+ err = perf_evsel__disable(cycles_evsel);
if (err) {
pr_debug("perf_evlist__disable_event failed!\n");
goto out_err;
@@ -500,7 +500,7 @@ int test__switch_tracking(void)
goto out_err;
}
- err = perf_evlist__enable_event(evlist, cycles_evsel);
+ err = perf_evsel__enable(cycles_evsel);
if (err) {
pr_debug("perf_evlist__disable_event failed!\n");
goto out_err;
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index add16385f13e..2dfff7ac8ef3 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -31,7 +31,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
* if the number of exit event reported by the kernel is 1 or not
* in order to check the kernel returns correct number of event.
*/
-int test__task_exit(void)
+int test__task_exit(int subtest __maybe_unused)
{
int err = -1;
union perf_event *event;
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 3c8734a3abbc..82b2b5e6ba7c 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -1,6 +1,8 @@
#ifndef TESTS_H
#define TESTS_H
+#include <stdbool.h>
+
#define TEST_ASSERT_VAL(text, cond) \
do { \
if (!(cond)) { \
@@ -26,48 +28,63 @@ enum {
struct test {
const char *desc;
- int (*func)(void);
+ int (*func)(int subtest);
+ struct {
+ bool skip_if_fail;
+ int (*get_nr)(void);
+ const char *(*get_desc)(int subtest);
+ } subtest;
};
/* Tests */
-int test__vmlinux_matches_kallsyms(void);
-int test__openat_syscall_event(void);
-int test__openat_syscall_event_on_all_cpus(void);
-int test__basic_mmap(void);
-int test__PERF_RECORD(void);
-int test__perf_evsel__roundtrip_name_test(void);
-int test__perf_evsel__tp_sched_test(void);
-int test__syscall_openat_tp_fields(void);
-int test__pmu(void);
-int test__attr(void);
-int test__dso_data(void);
-int test__dso_data_cache(void);
-int test__dso_data_reopen(void);
-int test__parse_events(void);
-int test__hists_link(void);
-int test__python_use(void);
-int test__bp_signal(void);
-int test__bp_signal_overflow(void);
-int test__task_exit(void);
-int test__sw_clock_freq(void);
-int test__code_reading(void);
-int test__sample_parsing(void);
-int test__keep_tracking(void);
-int test__parse_no_sample_id_all(void);
-int test__dwarf_unwind(void);
-int test__hists_filter(void);
-int test__mmap_thread_lookup(void);
-int test__thread_mg_share(void);
-int test__hists_output(void);
-int test__hists_cumulate(void);
-int test__switch_tracking(void);
-int test__fdarray__filter(void);
-int test__fdarray__add(void);
-int test__kmod_path__parse(void);
-int test__thread_map(void);
-int test__llvm(void);
-int test__bpf(void);
-int test_session_topology(void);
+int test__vmlinux_matches_kallsyms(int subtest);
+int test__openat_syscall_event(int subtest);
+int test__openat_syscall_event_on_all_cpus(int subtest);
+int test__basic_mmap(int subtest);
+int test__PERF_RECORD(int subtest);
+int test__perf_evsel__roundtrip_name_test(int subtest);
+int test__perf_evsel__tp_sched_test(int subtest);
+int test__syscall_openat_tp_fields(int subtest);
+int test__pmu(int subtest);
+int test__attr(int subtest);
+int test__dso_data(int subtest);
+int test__dso_data_cache(int subtest);
+int test__dso_data_reopen(int subtest);
+int test__parse_events(int subtest);
+int test__hists_link(int subtest);
+int test__python_use(int subtest);
+int test__bp_signal(int subtest);
+int test__bp_signal_overflow(int subtest);
+int test__task_exit(int subtest);
+int test__sw_clock_freq(int subtest);
+int test__code_reading(int subtest);
+int test__sample_parsing(int subtest);
+int test__keep_tracking(int subtest);
+int test__parse_no_sample_id_all(int subtest);
+int test__dwarf_unwind(int subtest);
+int test__hists_filter(int subtest);
+int test__mmap_thread_lookup(int subtest);
+int test__thread_mg_share(int subtest);
+int test__hists_output(int subtest);
+int test__hists_cumulate(int subtest);
+int test__switch_tracking(int subtest);
+int test__fdarray__filter(int subtest);
+int test__fdarray__add(int subtest);
+int test__kmod_path__parse(int subtest);
+int test__thread_map(int subtest);
+int test__llvm(int subtest);
+const char *test__llvm_subtest_get_desc(int subtest);
+int test__llvm_subtest_get_nr(void);
+int test__bpf(int subtest);
+const char *test__bpf_subtest_get_desc(int subtest);
+int test__bpf_subtest_get_nr(void);
+int test_session_topology(int subtest);
+int test__thread_map_synthesize(int subtest);
+int test__cpu_map_synthesize(int subtest);
+int test__synthesize_stat_config(int subtest);
+int test__synthesize_stat(int subtest);
+int test__synthesize_stat_round(int subtest);
+int test__event_update(int subtest);
#if defined(__arm__) || defined(__aarch64__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
index 138a0e3431fa..fccde848fe9c 100644
--- a/tools/perf/tests/thread-map.c
+++ b/tools/perf/tests/thread-map.c
@@ -4,7 +4,7 @@
#include "thread_map.h"
#include "debug.h"
-int test__thread_map(void)
+int test__thread_map(int subtest __maybe_unused)
{
struct thread_map *map;
@@ -40,3 +40,46 @@ int test__thread_map(void)
thread_map__put(map);
return 0;
}
+
+static int process_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct thread_map_event *map = &event->thread_map;
+ struct thread_map *threads;
+
+ TEST_ASSERT_VAL("wrong nr", map->nr == 1);
+ TEST_ASSERT_VAL("wrong pid", map->entries[0].pid == (u64) getpid());
+ TEST_ASSERT_VAL("wrong comm", !strcmp(map->entries[0].comm, "perf"));
+
+ threads = thread_map__new_event(&event->thread_map);
+ TEST_ASSERT_VAL("failed to alloc map", threads);
+
+ TEST_ASSERT_VAL("wrong nr", threads->nr == 1);
+ TEST_ASSERT_VAL("wrong pid",
+ thread_map__pid(threads, 0) == getpid());
+ TEST_ASSERT_VAL("wrong comm",
+ thread_map__comm(threads, 0) &&
+ !strcmp(thread_map__comm(threads, 0), "perf"));
+ TEST_ASSERT_VAL("wrong refcnt",
+ atomic_read(&threads->refcnt) == 1);
+ thread_map__put(threads);
+ return 0;
+}
+
+int test__thread_map_synthesize(int subtest __maybe_unused)
+{
+ struct thread_map *threads;
+
+ /* test map on current pid */
+ threads = thread_map__new_by_pid(getpid());
+ TEST_ASSERT_VAL("failed to alloc map", threads);
+
+ thread_map__read_comms(threads);
+
+ TEST_ASSERT_VAL("failed to synthesize map",
+ !perf_event__synthesize_thread_map2(NULL, threads, process_event, NULL));
+
+ return 0;
+}
diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-mg-share.c
index 01fabb19d746..188b63140fc8 100644
--- a/tools/perf/tests/thread-mg-share.c
+++ b/tools/perf/tests/thread-mg-share.c
@@ -4,7 +4,7 @@
#include "map.h"
#include "debug.h"
-int test__thread_mg_share(void)
+int test__thread_mg_share(int subtest __maybe_unused)
{
struct machines machines;
struct machine *machine;
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index f5bb096c3bd9..98fe69ac553c 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -84,7 +84,7 @@ static int check_cpu_topology(char *path, struct cpu_map *map)
return 0;
}
-int test_session_topology(void)
+int test_session_topology(int subtest __maybe_unused)
{
char path[PATH_MAX];
struct cpu_map *map;
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index d677e018e504..f0bfc9e8fd9f 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -18,7 +18,7 @@ static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused,
#define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x))
-int test__vmlinux_matches_kallsyms(void)
+int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
{
int err = -1;
struct rb_node *nd;
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index e9703c0829f1..d37202121689 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -528,7 +528,7 @@ static struct ui_browser_colorset {
.colorset = HE_COLORSET_SELECTED,
.name = "selected",
.fg = "black",
- .bg = "lightgray",
+ .bg = "yellow",
},
{
.colorset = HE_COLORSET_CODE,
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index e5afb8936040..901d481e6cea 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -178,12 +178,51 @@ static int callchain_node__count_rows_rb_tree(struct callchain_node *node)
return n;
}
+static int callchain_node__count_flat_rows(struct callchain_node *node)
+{
+ struct callchain_list *chain;
+ char folded_sign = 0;
+ int n = 0;
+
+ list_for_each_entry(chain, &node->parent_val, list) {
+ if (!folded_sign) {
+ /* only check first chain list entry */
+ folded_sign = callchain_list__folded(chain);
+ if (folded_sign == '+')
+ return 1;
+ }
+ n++;
+ }
+
+ list_for_each_entry(chain, &node->val, list) {
+ if (!folded_sign) {
+ /* node->parent_val list might be empty */
+ folded_sign = callchain_list__folded(chain);
+ if (folded_sign == '+')
+ return 1;
+ }
+ n++;
+ }
+
+ return n;
+}
+
+static int callchain_node__count_folded_rows(struct callchain_node *node __maybe_unused)
+{
+ return 1;
+}
+
static int callchain_node__count_rows(struct callchain_node *node)
{
struct callchain_list *chain;
bool unfolded = false;
int n = 0;
+ if (callchain_param.mode == CHAIN_FLAT)
+ return callchain_node__count_flat_rows(node);
+ else if (callchain_param.mode == CHAIN_FOLDED)
+ return callchain_node__count_folded_rows(node);
+
list_for_each_entry(chain, &node->val, list) {
++n;
unfolded = chain->unfolded;
@@ -263,7 +302,7 @@ static void callchain_node__init_have_children(struct callchain_node *node,
chain = list_entry(node->val.next, struct callchain_list, list);
chain->has_children = has_sibling;
- if (!list_empty(&node->val)) {
+ if (node->val.next != node->val.prev) {
chain = list_entry(node->val.prev, struct callchain_list, list);
chain->has_children = !RB_EMPTY_ROOT(&node->rb_root);
}
@@ -279,6 +318,9 @@ static void callchain__init_have_children(struct rb_root *root)
for (nd = rb_first(root); nd; nd = rb_next(nd)) {
struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
callchain_node__init_have_children(node, has_sibling);
+ if (callchain_param.mode == CHAIN_FLAT ||
+ callchain_param.mode == CHAIN_FOLDED)
+ callchain_node__make_parent_list(node);
}
}
@@ -298,6 +340,9 @@ static bool hist_browser__toggle_fold(struct hist_browser *browser)
struct callchain_list *cl = container_of(ms, struct callchain_list, ms);
bool has_children;
+ if (!he || !ms)
+ return false;
+
if (ms == &he->ms)
has_children = hist_entry__toggle_fold(he);
else
@@ -574,6 +619,231 @@ static bool hist_browser__check_dump_full(struct hist_browser *browser __maybe_u
#define LEVEL_OFFSET_STEP 3
+static int hist_browser__show_callchain_list(struct hist_browser *browser,
+ struct callchain_node *node,
+ struct callchain_list *chain,
+ unsigned short row, u64 total,
+ bool need_percent, int offset,
+ print_callchain_entry_fn print,
+ struct callchain_print_arg *arg)
+{
+ char bf[1024], *alloc_str;
+ const char *str;
+
+ if (arg->row_offset != 0) {
+ arg->row_offset--;
+ return 0;
+ }
+
+ alloc_str = NULL;
+ str = callchain_list__sym_name(chain, bf, sizeof(bf),
+ browser->show_dso);
+
+ if (need_percent) {
+ char buf[64];
+
+ callchain_node__scnprintf_value(node, buf, sizeof(buf),
+ total);
+
+ if (asprintf(&alloc_str, "%s %s", buf, str) < 0)
+ str = "Not enough memory!";
+ else
+ str = alloc_str;
+ }
+
+ print(browser, chain, str, offset, row, arg);
+
+ free(alloc_str);
+ return 1;
+}
+
+static int hist_browser__show_callchain_flat(struct hist_browser *browser,
+ struct rb_root *root,
+ unsigned short row, u64 total,
+ print_callchain_entry_fn print,
+ struct callchain_print_arg *arg,
+ check_output_full_fn is_output_full)
+{
+ struct rb_node *node;
+ int first_row = row, offset = LEVEL_OFFSET_STEP;
+ bool need_percent;
+
+ node = rb_first(root);
+ need_percent = node && rb_next(node);
+
+ while (node) {
+ struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
+ struct rb_node *next = rb_next(node);
+ struct callchain_list *chain;
+ char folded_sign = ' ';
+ int first = true;
+ int extra_offset = 0;
+
+ list_for_each_entry(chain, &child->parent_val, list) {
+ bool was_first = first;
+
+ if (first)
+ first = false;
+ else if (need_percent)
+ extra_offset = LEVEL_OFFSET_STEP;
+
+ folded_sign = callchain_list__folded(chain);
+
+ row += hist_browser__show_callchain_list(browser, child,
+ chain, row, total,
+ was_first && need_percent,
+ offset + extra_offset,
+ print, arg);
+
+ if (is_output_full(browser, row))
+ goto out;
+
+ if (folded_sign == '+')
+ goto next;
+ }
+
+ list_for_each_entry(chain, &child->val, list) {
+ bool was_first = first;
+
+ if (first)
+ first = false;
+ else if (need_percent)
+ extra_offset = LEVEL_OFFSET_STEP;
+
+ folded_sign = callchain_list__folded(chain);
+
+ row += hist_browser__show_callchain_list(browser, child,
+ chain, row, total,
+ was_first && need_percent,
+ offset + extra_offset,
+ print, arg);
+
+ if (is_output_full(browser, row))
+ goto out;
+
+ if (folded_sign == '+')
+ break;
+ }
+
+next:
+ if (is_output_full(browser, row))
+ break;
+ node = next;
+ }
+out:
+ return row - first_row;
+}
+
+static char *hist_browser__folded_callchain_str(struct hist_browser *browser,
+ struct callchain_list *chain,
+ char *value_str, char *old_str)
+{
+ char bf[1024];
+ const char *str;
+ char *new;
+
+ str = callchain_list__sym_name(chain, bf, sizeof(bf),
+ browser->show_dso);
+ if (old_str) {
+ if (asprintf(&new, "%s%s%s", old_str,
+ symbol_conf.field_sep ?: ";", str) < 0)
+ new = NULL;
+ } else {
+ if (value_str) {
+ if (asprintf(&new, "%s %s", value_str, str) < 0)
+ new = NULL;
+ } else {
+ if (asprintf(&new, "%s", str) < 0)
+ new = NULL;
+ }
+ }
+ return new;
+}
+
+static int hist_browser__show_callchain_folded(struct hist_browser *browser,
+ struct rb_root *root,
+ unsigned short row, u64 total,
+ print_callchain_entry_fn print,
+ struct callchain_print_arg *arg,
+ check_output_full_fn is_output_full)
+{
+ struct rb_node *node;
+ int first_row = row, offset = LEVEL_OFFSET_STEP;
+ bool need_percent;
+
+ node = rb_first(root);
+ need_percent = node && rb_next(node);
+
+ while (node) {
+ struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
+ struct rb_node *next = rb_next(node);
+ struct callchain_list *chain, *first_chain = NULL;
+ int first = true;
+ char *value_str = NULL, *value_str_alloc = NULL;
+ char *chain_str = NULL, *chain_str_alloc = NULL;
+
+ if (arg->row_offset != 0) {
+ arg->row_offset--;
+ goto next;
+ }
+
+ if (need_percent) {
+ char buf[64];
+
+ callchain_node__scnprintf_value(child, buf, sizeof(buf), total);
+ if (asprintf(&value_str, "%s", buf) < 0) {
+ value_str = (char *)"<...>";
+ goto do_print;
+ }
+ value_str_alloc = value_str;
+ }
+
+ list_for_each_entry(chain, &child->parent_val, list) {
+ chain_str = hist_browser__folded_callchain_str(browser,
+ chain, value_str, chain_str);
+ if (first) {
+ first = false;
+ first_chain = chain;
+ }
+
+ if (chain_str == NULL) {
+ chain_str = (char *)"Not enough memory!";
+ goto do_print;
+ }
+
+ chain_str_alloc = chain_str;
+ }
+
+ list_for_each_entry(chain, &child->val, list) {
+ chain_str = hist_browser__folded_callchain_str(browser,
+ chain, value_str, chain_str);
+ if (first) {
+ first = false;
+ first_chain = chain;
+ }
+
+ if (chain_str == NULL) {
+ chain_str = (char *)"Not enough memory!";
+ goto do_print;
+ }
+
+ chain_str_alloc = chain_str;
+ }
+
+do_print:
+ print(browser, first_chain, chain_str, offset, row++, arg);
+ free(value_str_alloc);
+ free(chain_str_alloc);
+
+next:
+ if (is_output_full(browser, row))
+ break;
+ node = next;
+ }
+
+ return row - first_row;
+}
+
static int hist_browser__show_callchain(struct hist_browser *browser,
struct rb_root *root, int level,
unsigned short row, u64 total,
@@ -592,15 +862,12 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
while (node) {
struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
struct rb_node *next = rb_next(node);
- u64 cumul = callchain_cumul_hits(child);
struct callchain_list *chain;
char folded_sign = ' ';
int first = true;
int extra_offset = 0;
list_for_each_entry(chain, &child->val, list) {
- char bf[1024], *alloc_str;
- const char *str;
bool was_first = first;
if (first)
@@ -609,31 +876,16 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
extra_offset = LEVEL_OFFSET_STEP;
folded_sign = callchain_list__folded(chain);
- if (arg->row_offset != 0) {
- arg->row_offset--;
- goto do_next;
- }
-
- alloc_str = NULL;
- str = callchain_list__sym_name(chain, bf, sizeof(bf),
- browser->show_dso);
-
- if (was_first && need_percent) {
- double percent = cumul * 100.0 / total;
- if (asprintf(&alloc_str, "%2.2f%% %s", percent, str) < 0)
- str = "Not enough memory!";
- else
- str = alloc_str;
- }
-
- print(browser, chain, str, offset + extra_offset, row, arg);
+ row += hist_browser__show_callchain_list(browser, child,
+ chain, row, total,
+ was_first && need_percent,
+ offset + extra_offset,
+ print, arg);
- free(alloc_str);
-
- if (is_output_full(browser, ++row))
+ if (is_output_full(browser, row))
goto out;
-do_next:
+
if (folded_sign == '+')
break;
}
@@ -789,7 +1041,8 @@ static int hist_browser__show_entry(struct hist_browser *browser,
hist_browser__gotorc(browser, row, 0);
perf_hpp__for_each_format(fmt) {
- if (perf_hpp__should_skip(fmt) || column++ < browser->b.horiz_scroll)
+ if (perf_hpp__should_skip(fmt, entry->hists) ||
+ column++ < browser->b.horiz_scroll)
continue;
if (current_entry && browser->b.navkeypressed) {
@@ -844,10 +1097,22 @@ static int hist_browser__show_entry(struct hist_browser *browser,
total = entry->stat.period;
}
- printed += hist_browser__show_callchain(browser,
+ if (callchain_param.mode == CHAIN_FLAT) {
+ printed += hist_browser__show_callchain_flat(browser,
+ &entry->sorted_chain, row, total,
+ hist_browser__show_callchain_entry, &arg,
+ hist_browser__check_output_full);
+ } else if (callchain_param.mode == CHAIN_FOLDED) {
+ printed += hist_browser__show_callchain_folded(browser,
+ &entry->sorted_chain, row, total,
+ hist_browser__show_callchain_entry, &arg,
+ hist_browser__check_output_full);
+ } else {
+ printed += hist_browser__show_callchain(browser,
&entry->sorted_chain, 1, row, total,
hist_browser__show_callchain_entry, &arg,
hist_browser__check_output_full);
+ }
if (arg.is_current_entry)
browser->he_selection = entry;
@@ -880,7 +1145,7 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char *
}
perf_hpp__for_each_format(fmt) {
- if (perf_hpp__should_skip(fmt) || column++ < browser->b.horiz_scroll)
+ if (perf_hpp__should_skip(fmt, hists) || column++ < browser->b.horiz_scroll)
continue;
ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
@@ -928,6 +1193,8 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
}
ui_browser__hists_init_top(browser);
+ hb->he_selection = NULL;
+ hb->selection = NULL;
for (nd = browser->top; nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
@@ -1033,6 +1300,9 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
* and stop when we printed enough lines to fill the screen.
*/
do_offset:
+ if (!nd)
+ return;
+
if (offset > 0) {
do {
h = rb_entry(nd, struct hist_entry, rb_node);
@@ -1145,7 +1415,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
printed += fprintf(fp, "%c ", folded_sign);
perf_hpp__for_each_format(fmt) {
- if (perf_hpp__should_skip(fmt))
+ if (perf_hpp__should_skip(fmt, he->hists))
continue;
if (!first) {
@@ -1430,7 +1700,6 @@ close_file_and_continue:
struct popup_action {
struct thread *thread;
- struct dso *dso;
struct map_symbol ms;
int socket;
@@ -1565,7 +1834,6 @@ add_dso_opt(struct hist_browser *browser, struct popup_action *act,
return 0;
act->ms.map = map;
- act->dso = map->dso;
act->fn = do_zoom_dso;
return 1;
}
@@ -1796,10 +2064,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
SLang_reset_tty();
SLang_init_tty(0, 0, 0);
- if (min_pcnt) {
+ if (min_pcnt)
browser->min_pcnt = min_pcnt;
- hist_browser__update_nr_entries(browser);
- }
+ hist_browser__update_nr_entries(browser);
browser->pstack = pstack__new(3);
if (browser->pstack == NULL)
@@ -1827,7 +2094,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
while (1) {
struct thread *thread = NULL;
- struct dso *dso = NULL;
struct map *map = NULL;
int choice = 0;
int socked_id = -1;
@@ -1839,8 +2105,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
if (browser->he_selection != NULL) {
thread = hist_browser__selected_thread(browser);
map = browser->selection->map;
- if (map)
- dso = map->dso;
socked_id = browser->he_selection->socket;
}
switch (key) {
@@ -1874,7 +2138,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
hist_browser__dump(browser);
continue;
case 'd':
- actions->dso = dso;
+ actions->ms.map = map;
do_zoom_dso(browser, actions);
continue;
case 'V':
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 4b3585eed1e8..0f8dcfdfb10f 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -89,8 +89,8 @@ void perf_gtk__init_hpp(void)
perf_gtk__hpp_color_overhead_acc;
}
-static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
- GtkTreeIter *parent, int col, u64 total)
+static void perf_gtk__add_callchain_flat(struct rb_root *root, GtkTreeStore *store,
+ GtkTreeIter *parent, int col, u64 total)
{
struct rb_node *nd;
bool has_single_node = (rb_first(root) == rb_last(root));
@@ -100,13 +100,132 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
struct callchain_list *chain;
GtkTreeIter iter, new_parent;
bool need_new_parent;
- double percent;
- u64 hits, child_total;
node = rb_entry(nd, struct callchain_node, rb_node);
- hits = callchain_cumul_hits(node);
- percent = 100.0 * hits / total;
+ new_parent = *parent;
+ need_new_parent = !has_single_node;
+
+ callchain_node__make_parent_list(node);
+
+ list_for_each_entry(chain, &node->parent_val, list) {
+ char buf[128];
+
+ gtk_tree_store_append(store, &iter, &new_parent);
+
+ callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
+ gtk_tree_store_set(store, &iter, 0, buf, -1);
+
+ callchain_list__sym_name(chain, buf, sizeof(buf), false);
+ gtk_tree_store_set(store, &iter, col, buf, -1);
+
+ if (need_new_parent) {
+ /*
+ * Only show the top-most symbol in a callchain
+ * if it's not the only callchain.
+ */
+ new_parent = iter;
+ need_new_parent = false;
+ }
+ }
+
+ list_for_each_entry(chain, &node->val, list) {
+ char buf[128];
+
+ gtk_tree_store_append(store, &iter, &new_parent);
+
+ callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
+ gtk_tree_store_set(store, &iter, 0, buf, -1);
+
+ callchain_list__sym_name(chain, buf, sizeof(buf), false);
+ gtk_tree_store_set(store, &iter, col, buf, -1);
+
+ if (need_new_parent) {
+ /*
+ * Only show the top-most symbol in a callchain
+ * if it's not the only callchain.
+ */
+ new_parent = iter;
+ need_new_parent = false;
+ }
+ }
+ }
+}
+
+static void perf_gtk__add_callchain_folded(struct rb_root *root, GtkTreeStore *store,
+ GtkTreeIter *parent, int col, u64 total)
+{
+ struct rb_node *nd;
+
+ for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+ struct callchain_node *node;
+ struct callchain_list *chain;
+ GtkTreeIter iter;
+ char buf[64];
+ char *str, *str_alloc = NULL;
+ bool first = true;
+
+ node = rb_entry(nd, struct callchain_node, rb_node);
+
+ callchain_node__make_parent_list(node);
+
+ list_for_each_entry(chain, &node->parent_val, list) {
+ char name[1024];
+
+ callchain_list__sym_name(chain, name, sizeof(name), false);
+
+ if (asprintf(&str, "%s%s%s",
+ first ? "" : str_alloc,
+ first ? "" : symbol_conf.field_sep ?: "; ",
+ name) < 0)
+ return;
+
+ first = false;
+ free(str_alloc);
+ str_alloc = str;
+ }
+
+ list_for_each_entry(chain, &node->val, list) {
+ char name[1024];
+
+ callchain_list__sym_name(chain, name, sizeof(name), false);
+
+ if (asprintf(&str, "%s%s%s",
+ first ? "" : str_alloc,
+ first ? "" : symbol_conf.field_sep ?: "; ",
+ name) < 0)
+ return;
+
+ first = false;
+ free(str_alloc);
+ str_alloc = str;
+ }
+
+ gtk_tree_store_append(store, &iter, parent);
+
+ callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
+ gtk_tree_store_set(store, &iter, 0, buf, -1);
+
+ gtk_tree_store_set(store, &iter, col, str, -1);
+
+ free(str_alloc);
+ }
+}
+
+static void perf_gtk__add_callchain_graph(struct rb_root *root, GtkTreeStore *store,
+ GtkTreeIter *parent, int col, u64 total)
+{
+ struct rb_node *nd;
+ bool has_single_node = (rb_first(root) == rb_last(root));
+
+ for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+ struct callchain_node *node;
+ struct callchain_list *chain;
+ GtkTreeIter iter, new_parent;
+ bool need_new_parent;
+ u64 child_total;
+
+ node = rb_entry(nd, struct callchain_node, rb_node);
new_parent = *parent;
need_new_parent = !has_single_node && (node->val_nr > 1);
@@ -116,7 +235,7 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
gtk_tree_store_append(store, &iter, &new_parent);
- scnprintf(buf, sizeof(buf), "%5.2f%%", percent);
+ callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
gtk_tree_store_set(store, &iter, 0, buf, -1);
callchain_list__sym_name(chain, buf, sizeof(buf), false);
@@ -138,11 +257,22 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
child_total = total;
/* Now 'iter' contains info of the last callchain_list */
- perf_gtk__add_callchain(&node->rb_root, store, &iter, col,
- child_total);
+ perf_gtk__add_callchain_graph(&node->rb_root, store, &iter, col,
+ child_total);
}
}
+static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
+ GtkTreeIter *parent, int col, u64 total)
+{
+ if (callchain_param.mode == CHAIN_FLAT)
+ perf_gtk__add_callchain_flat(root, store, parent, col, total);
+ else if (callchain_param.mode == CHAIN_FOLDED)
+ perf_gtk__add_callchain_folded(root, store, parent, col, total);
+ else
+ perf_gtk__add_callchain_graph(root, store, parent, col, total);
+}
+
static void on_row_activated(GtkTreeView *view, GtkTreePath *path,
GtkTreeViewColumn *col __maybe_unused,
gpointer user_data __maybe_unused)
@@ -188,7 +318,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
col_idx = 0;
perf_hpp__for_each_format(fmt) {
- if (perf_hpp__should_skip(fmt))
+ if (perf_hpp__should_skip(fmt, hists))
continue;
/*
@@ -238,7 +368,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
col_idx = 0;
perf_hpp__for_each_format(fmt) {
- if (perf_hpp__should_skip(fmt))
+ if (perf_hpp__should_skip(fmt, h->hists))
continue;
if (fmt->color)
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 5029ba2b55af..bf2a66e254ea 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -443,7 +443,6 @@ LIST_HEAD(perf_hpp__sort_list);
void perf_hpp__init(void)
{
- struct list_head *list;
int i;
for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
@@ -484,17 +483,6 @@ void perf_hpp__init(void)
if (symbol_conf.show_total_period)
hpp_dimension__add_output(PERF_HPP__PERIOD);
-
- /* prepend overhead field for backward compatiblity. */
- list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list;
- if (list_empty(list))
- list_add(list, &perf_hpp__sort_list);
-
- if (symbol_conf.cumulate_callchain) {
- list = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC].sort_list;
- if (list_empty(list))
- list_add(list, &perf_hpp__sort_list);
- }
}
void perf_hpp__column_register(struct perf_hpp_fmt *format)
@@ -619,7 +607,7 @@ unsigned int hists__sort_list_width(struct hists *hists)
struct perf_hpp dummy_hpp;
perf_hpp__for_each_format(fmt) {
- if (perf_hpp__should_skip(fmt))
+ if (perf_hpp__should_skip(fmt, hists))
continue;
if (first)
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index dfcbc90146ef..387110d50b00 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -34,10 +34,10 @@ static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask,
return ret;
}
-static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
+static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
+ struct callchain_list *chain,
int depth, int depth_mask, int period,
- u64 total_samples, u64 hits,
- int left_margin)
+ u64 total_samples, int left_margin)
{
int i;
size_t ret = 0;
@@ -50,10 +50,9 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
else
ret += fprintf(fp, " ");
if (!period && i == depth - 1) {
- double percent;
-
- percent = hits * 100.0 / total_samples;
- ret += percent_color_fprintf(fp, "--%2.2f%%-- ", percent);
+ ret += fprintf(fp, "--");
+ ret += callchain_node__fprintf_value(node, fp, total_samples);
+ ret += fprintf(fp, "--");
} else
ret += fprintf(fp, "%s", " ");
}
@@ -82,13 +81,14 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
int depth_mask, int left_margin)
{
struct rb_node *node, *next;
- struct callchain_node *child;
+ struct callchain_node *child = NULL;
struct callchain_list *chain;
int new_depth_mask = depth_mask;
u64 remaining;
size_t ret = 0;
int i;
uint entries_printed = 0;
+ int cumul_count = 0;
remaining = total_samples;
@@ -100,6 +100,7 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
child = rb_entry(node, struct callchain_node, rb_node);
cumul = callchain_cumul_hits(child);
remaining -= cumul;
+ cumul_count += callchain_cumul_counts(child);
/*
* The depth mask manages the output of pipes that show
@@ -120,10 +121,9 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
left_margin);
i = 0;
list_for_each_entry(chain, &child->val, list) {
- ret += ipchain__fprintf_graph(fp, chain, depth,
+ ret += ipchain__fprintf_graph(fp, child, chain, depth,
new_depth_mask, i++,
total_samples,
- cumul,
left_margin);
}
@@ -143,14 +143,23 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
if (callchain_param.mode == CHAIN_GRAPH_REL &&
remaining && remaining != total_samples) {
+ struct callchain_node rem_node = {
+ .hit = remaining,
+ };
if (!rem_sq_bracket)
return ret;
+ if (callchain_param.value == CCVAL_COUNT && child && child->parent) {
+ rem_node.count = child->parent->children_count - cumul_count;
+ if (rem_node.count <= 0)
+ return ret;
+ }
+
new_depth_mask &= ~(1 << (depth - 1));
- ret += ipchain__fprintf_graph(fp, &rem_hits, depth,
+ ret += ipchain__fprintf_graph(fp, &rem_node, &rem_hits, depth,
new_depth_mask, 0, total_samples,
- remaining, left_margin);
+ left_margin);
}
return ret;
@@ -243,12 +252,11 @@ static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *tree,
struct rb_node *rb_node = rb_first(tree);
while (rb_node) {
- double percent;
-
chain = rb_entry(rb_node, struct callchain_node, rb_node);
- percent = chain->hit * 100.0 / total_samples;
- ret = percent_color_fprintf(fp, " %6.2f%%\n", percent);
+ ret += fprintf(fp, " ");
+ ret += callchain_node__fprintf_value(chain, fp, total_samples);
+ ret += fprintf(fp, "\n");
ret += __callchain__fprintf_flat(fp, chain, total_samples);
ret += fprintf(fp, "\n");
if (++entries_printed == callchain_param.print_limit)
@@ -260,6 +268,57 @@ static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *tree,
return ret;
}
+static size_t __callchain__fprintf_folded(FILE *fp, struct callchain_node *node)
+{
+ const char *sep = symbol_conf.field_sep ?: ";";
+ struct callchain_list *chain;
+ size_t ret = 0;
+ char bf[1024];
+ bool first;
+
+ if (!node)
+ return 0;
+
+ ret += __callchain__fprintf_folded(fp, node->parent);
+
+ first = (ret == 0);
+ list_for_each_entry(chain, &node->val, list) {
+ if (chain->ip >= PERF_CONTEXT_MAX)
+ continue;
+ ret += fprintf(fp, "%s%s", first ? "" : sep,
+ callchain_list__sym_name(chain,
+ bf, sizeof(bf), false));
+ first = false;
+ }
+
+ return ret;
+}
+
+static size_t callchain__fprintf_folded(FILE *fp, struct rb_root *tree,
+ u64 total_samples)
+{
+ size_t ret = 0;
+ u32 entries_printed = 0;
+ struct callchain_node *chain;
+ struct rb_node *rb_node = rb_first(tree);
+
+ while (rb_node) {
+
+ chain = rb_entry(rb_node, struct callchain_node, rb_node);
+
+ ret += callchain_node__fprintf_value(chain, fp, total_samples);
+ ret += fprintf(fp, " ");
+ ret += __callchain__fprintf_folded(fp, chain);
+ ret += fprintf(fp, "\n");
+ if (++entries_printed == callchain_param.print_limit)
+ break;
+
+ rb_node = rb_next(rb_node);
+ }
+
+ return ret;
+}
+
static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
u64 total_samples, int left_margin,
FILE *fp)
@@ -278,6 +337,9 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
case CHAIN_FLAT:
return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples);
break;
+ case CHAIN_FOLDED:
+ return callchain__fprintf_folded(fp, &he->sorted_chain, total_samples);
+ break;
case CHAIN_NONE:
break;
default:
@@ -323,7 +385,7 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
return 0;
perf_hpp__for_each_format(fmt) {
- if (perf_hpp__should_skip(fmt))
+ if (perf_hpp__should_skip(fmt, he->hists))
continue;
/*
@@ -402,7 +464,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
fprintf(fp, "# ");
perf_hpp__for_each_format(fmt) {
- if (perf_hpp__should_skip(fmt))
+ if (perf_hpp__should_skip(fmt, hists))
continue;
if (!first)
@@ -428,7 +490,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
perf_hpp__for_each_format(fmt) {
unsigned int i;
- if (perf_hpp__should_skip(fmt))
+ if (perf_hpp__should_skip(fmt, hists))
continue;
if (!first)
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 591b3fe3ed49..5eec53a3f4ac 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -6,24 +6,20 @@ libperf-y += config.o
libperf-y += ctype.o
libperf-y += db-export.o
libperf-y += env.o
-libperf-y += environment.o
libperf-y += event.o
libperf-y += evlist.o
libperf-y += evsel.o
-libperf-y += exec_cmd.o
-libperf-y += find_next_bit.o
-libperf-y += help.o
+libperf-y += find_bit.o
libperf-y += kallsyms.o
libperf-y += levenshtein.o
libperf-y += llvm-utils.o
-libperf-y += parse-options.o
libperf-y += parse-events.o
libperf-y += perf_regs.o
libperf-y += path.o
libperf-y += rbtree.o
+libperf-y += libstring.o
libperf-y += bitmap.o
libperf-y += hweight.o
-libperf-y += run-command.o
libperf-y += quote.o
libperf-y += strbuf.o
libperf-y += string.o
@@ -32,11 +28,9 @@ libperf-y += strfilter.o
libperf-y += top.o
libperf-y += usage.o
libperf-y += wrapper.o
-libperf-y += sigchain.o
libperf-y += dso.o
libperf-y += symbol.o
libperf-y += color.o
-libperf-y += pager.o
libperf-y += header.o
libperf-y += callchain.o
libperf-y += values.o
@@ -86,8 +80,11 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt.o
libperf-$(CONFIG_AUXTRACE) += intel-bts.o
libperf-y += parse-branch-options.o
libperf-y += parse-regs-options.o
+libperf-y += term.o
+libperf-y += help-unknown-cmd.o
libperf-$(CONFIG_LIBBPF) += bpf-loader.o
+libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
libperf-$(CONFIG_LIBELF) += symbol-elf.o
libperf-$(CONFIG_LIBELF) += probe-file.o
libperf-$(CONFIG_LIBELF) += probe-event.o
@@ -110,7 +107,6 @@ libperf-$(CONFIG_ZLIB) += zlib.o
libperf-$(CONFIG_LZMA) += lzma.o
CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
-CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="BUILD_STR($(prefix_SQ))"
$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
$(call rule_mkdir)
@@ -136,8 +132,10 @@ CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
-CFLAGS_find_next_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_bitmap.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_find_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_parse-events.o += -Wno-redundant-decls
@@ -145,7 +143,11 @@ $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
-$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c FORCE
+$(OUTPUT)util/bitmap.o: ../lib/bitmap.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/find_bit.o: ../lib/find_bit.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
@@ -153,6 +155,10 @@ $(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
+$(OUTPUT)util/libstring.o: ../lib/string.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
$(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 1dd1949b0e79..b795b6994144 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -65,6 +65,11 @@ static int call__parse(struct ins_operands *ops)
name++;
+#ifdef __arm__
+ if (strchr(name, '+'))
+ return -1;
+#endif
+
tok = strchr(name, '>');
if (tok == NULL)
return -1;
@@ -246,7 +251,11 @@ static int mov__parse(struct ins_operands *ops)
return -1;
target = ++s;
+#ifdef __arm__
+ comment = strchr(s, ';');
+#else
comment = strchr(s, '#');
+#endif
if (comment != NULL)
s = comment - 1;
@@ -354,6 +363,20 @@ static struct ins instructions[] = {
{ .name = "addq", .ops = &mov_ops, },
{ .name = "addw", .ops = &mov_ops, },
{ .name = "and", .ops = &mov_ops, },
+#ifdef __arm__
+ { .name = "b", .ops = &jump_ops, }, // might also be a call
+ { .name = "bcc", .ops = &jump_ops, },
+ { .name = "bcs", .ops = &jump_ops, },
+ { .name = "beq", .ops = &jump_ops, },
+ { .name = "bge", .ops = &jump_ops, },
+ { .name = "bgt", .ops = &jump_ops, },
+ { .name = "bhi", .ops = &jump_ops, },
+ { .name = "bl", .ops = &call_ops, },
+ { .name = "blt", .ops = &jump_ops, },
+ { .name = "bls", .ops = &jump_ops, },
+ { .name = "blx", .ops = &call_ops, },
+ { .name = "bne", .ops = &jump_ops, },
+#endif
{ .name = "bts", .ops = &mov_ops, },
{ .name = "call", .ops = &call_ops, },
{ .name = "callq", .ops = &call_ops, },
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 7f10430af39c..360fda01f3b0 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -45,7 +45,7 @@
#include "event.h"
#include "session.h"
#include "debug.h"
-#include "parse-options.h"
+#include <subcmd/parse-options.h>
#include "intel-pt.h"
#include "intel-bts.h"
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 4c50411371db..540a7efa657e 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -5,11 +5,15 @@
* Copyright (C) 2015 Huawei Inc.
*/
+#include <linux/bpf.h>
#include <bpf/libbpf.h>
#include <linux/err.h>
+#include <linux/string.h>
#include "perf.h"
#include "debug.h"
#include "bpf-loader.h"
+#include "bpf-prologue.h"
+#include "llvm-utils.h"
#include "probe-event.h"
#include "probe-finder.h" // for MAX_PROBES
#include "llvm-utils.h"
@@ -32,6 +36,10 @@ DEFINE_PRINT_FN(debug, 1)
struct bpf_prog_priv {
struct perf_probe_event pev;
+ bool need_prologue;
+ struct bpf_insn *insns_buf;
+ int nr_types;
+ int *type_mapping;
};
static bool libbpf_initialized;
@@ -106,10 +114,178 @@ bpf_prog_priv__clear(struct bpf_program *prog __maybe_unused,
struct bpf_prog_priv *priv = _priv;
cleanup_perf_probe_events(&priv->pev, 1);
+ zfree(&priv->insns_buf);
+ zfree(&priv->type_mapping);
free(priv);
}
static int
+prog_config__exec(const char *value, struct perf_probe_event *pev)
+{
+ pev->uprobes = true;
+ pev->target = strdup(value);
+ if (!pev->target)
+ return -ENOMEM;
+ return 0;
+}
+
+static int
+prog_config__module(const char *value, struct perf_probe_event *pev)
+{
+ pev->uprobes = false;
+ pev->target = strdup(value);
+ if (!pev->target)
+ return -ENOMEM;
+ return 0;
+}
+
+static int
+prog_config__bool(const char *value, bool *pbool, bool invert)
+{
+ int err;
+ bool bool_value;
+
+ if (!pbool)
+ return -EINVAL;
+
+ err = strtobool(value, &bool_value);
+ if (err)
+ return err;
+
+ *pbool = invert ? !bool_value : bool_value;
+ return 0;
+}
+
+static int
+prog_config__inlines(const char *value,
+ struct perf_probe_event *pev __maybe_unused)
+{
+ return prog_config__bool(value, &probe_conf.no_inlines, true);
+}
+
+static int
+prog_config__force(const char *value,
+ struct perf_probe_event *pev __maybe_unused)
+{
+ return prog_config__bool(value, &probe_conf.force_add, false);
+}
+
+static struct {
+ const char *key;
+ const char *usage;
+ const char *desc;
+ int (*func)(const char *, struct perf_probe_event *);
+} bpf_prog_config_terms[] = {
+ {
+ .key = "exec",
+ .usage = "exec=<full path of file>",
+ .desc = "Set uprobe target",
+ .func = prog_config__exec,
+ },
+ {
+ .key = "module",
+ .usage = "module=<module name> ",
+ .desc = "Set kprobe module",
+ .func = prog_config__module,
+ },
+ {
+ .key = "inlines",
+ .usage = "inlines=[yes|no] ",
+ .desc = "Probe at inline symbol",
+ .func = prog_config__inlines,
+ },
+ {
+ .key = "force",
+ .usage = "force=[yes|no] ",
+ .desc = "Forcibly add events with existing name",
+ .func = prog_config__force,
+ },
+};
+
+static int
+do_prog_config(const char *key, const char *value,
+ struct perf_probe_event *pev)
+{
+ unsigned int i;
+
+ pr_debug("config bpf program: %s=%s\n", key, value);
+ for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++)
+ if (strcmp(key, bpf_prog_config_terms[i].key) == 0)
+ return bpf_prog_config_terms[i].func(value, pev);
+
+ pr_debug("BPF: ERROR: invalid program config option: %s=%s\n",
+ key, value);
+
+ pr_debug("\nHint: Valid options are:\n");
+ for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++)
+ pr_debug("\t%s:\t%s\n", bpf_prog_config_terms[i].usage,
+ bpf_prog_config_terms[i].desc);
+ pr_debug("\n");
+
+ return -BPF_LOADER_ERRNO__PROGCONF_TERM;
+}
+
+static const char *
+parse_prog_config_kvpair(const char *config_str, struct perf_probe_event *pev)
+{
+ char *text = strdup(config_str);
+ char *sep, *line;
+ const char *main_str = NULL;
+ int err = 0;
+
+ if (!text) {
+ pr_debug("No enough memory: dup config_str failed\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ line = text;
+ while ((sep = strchr(line, ';'))) {
+ char *equ;
+
+ *sep = '\0';
+ equ = strchr(line, '=');
+ if (!equ) {
+ pr_warning("WARNING: invalid config in BPF object: %s\n",
+ line);
+ pr_warning("\tShould be 'key=value'.\n");
+ goto nextline;
+ }
+ *equ = '\0';
+
+ err = do_prog_config(line, equ + 1, pev);
+ if (err)
+ break;
+nextline:
+ line = sep + 1;
+ }
+
+ if (!err)
+ main_str = config_str + (line - text);
+ free(text);
+
+ return err ? ERR_PTR(err) : main_str;
+}
+
+static int
+parse_prog_config(const char *config_str, struct perf_probe_event *pev)
+{
+ int err;
+ const char *main_str = parse_prog_config_kvpair(config_str, pev);
+
+ if (IS_ERR(main_str))
+ return PTR_ERR(main_str);
+
+ err = parse_perf_probe_command(main_str, pev);
+ if (err < 0) {
+ pr_debug("bpf: '%s' is not a valid config string\n",
+ config_str);
+ /* parse failed, don't need clear pev. */
+ return -BPF_LOADER_ERRNO__CONFIG;
+ }
+ return 0;
+}
+
+static int
config_bpf_program(struct bpf_program *prog)
{
struct perf_probe_event *pev = NULL;
@@ -117,6 +293,10 @@ config_bpf_program(struct bpf_program *prog)
const char *config_str;
int err;
+ /* Initialize per-program probing setting */
+ probe_conf.no_inlines = false;
+ probe_conf.force_add = false;
+
config_str = bpf_program__title(prog, false);
if (IS_ERR(config_str)) {
pr_debug("bpf: unable to get title for program\n");
@@ -131,13 +311,9 @@ config_bpf_program(struct bpf_program *prog)
pev = &priv->pev;
pr_debug("bpf: config program '%s'\n", config_str);
- err = parse_perf_probe_command(config_str, pev);
- if (err < 0) {
- pr_debug("bpf: '%s' is not a valid config string\n",
- config_str);
- err = -BPF_LOADER_ERRNO__CONFIG;
+ err = parse_prog_config(config_str, pev);
+ if (err)
goto errout;
- }
if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) {
pr_debug("bpf: '%s': group for event is set and not '%s'.\n",
@@ -197,6 +373,220 @@ static int bpf__prepare_probe(void)
return err;
}
+static int
+preproc_gen_prologue(struct bpf_program *prog, int n,
+ struct bpf_insn *orig_insns, int orig_insns_cnt,
+ struct bpf_prog_prep_result *res)
+{
+ struct probe_trace_event *tev;
+ struct perf_probe_event *pev;
+ struct bpf_prog_priv *priv;
+ struct bpf_insn *buf;
+ size_t prologue_cnt = 0;
+ int i, err;
+
+ err = bpf_program__get_private(prog, (void **)&priv);
+ if (err || !priv)
+ goto errout;
+
+ pev = &priv->pev;
+
+ if (n < 0 || n >= priv->nr_types)
+ goto errout;
+
+ /* Find a tev belongs to that type */
+ for (i = 0; i < pev->ntevs; i++) {
+ if (priv->type_mapping[i] == n)
+ break;
+ }
+
+ if (i >= pev->ntevs) {
+ pr_debug("Internal error: prologue type %d not found\n", n);
+ return -BPF_LOADER_ERRNO__PROLOGUE;
+ }
+
+ tev = &pev->tevs[i];
+
+ buf = priv->insns_buf;
+ err = bpf__gen_prologue(tev->args, tev->nargs,
+ buf, &prologue_cnt,
+ BPF_MAXINSNS - orig_insns_cnt);
+ if (err) {
+ const char *title;
+
+ title = bpf_program__title(prog, false);
+ if (!title)
+ title = "[unknown]";
+
+ pr_debug("Failed to generate prologue for program %s\n",
+ title);
+ return err;
+ }
+
+ memcpy(&buf[prologue_cnt], orig_insns,
+ sizeof(struct bpf_insn) * orig_insns_cnt);
+
+ res->new_insn_ptr = buf;
+ res->new_insn_cnt = prologue_cnt + orig_insns_cnt;
+ res->pfd = NULL;
+ return 0;
+
+errout:
+ pr_debug("Internal error in preproc_gen_prologue\n");
+ return -BPF_LOADER_ERRNO__PROLOGUE;
+}
+
+/*
+ * compare_tev_args is reflexive, transitive and antisymmetric.
+ * I can proof it but this margin is too narrow to contain.
+ */
+static int compare_tev_args(const void *ptev1, const void *ptev2)
+{
+ int i, ret;
+ const struct probe_trace_event *tev1 =
+ *(const struct probe_trace_event **)ptev1;
+ const struct probe_trace_event *tev2 =
+ *(const struct probe_trace_event **)ptev2;
+
+ ret = tev2->nargs - tev1->nargs;
+ if (ret)
+ return ret;
+
+ for (i = 0; i < tev1->nargs; i++) {
+ struct probe_trace_arg *arg1, *arg2;
+ struct probe_trace_arg_ref *ref1, *ref2;
+
+ arg1 = &tev1->args[i];
+ arg2 = &tev2->args[i];
+
+ ret = strcmp(arg1->value, arg2->value);
+ if (ret)
+ return ret;
+
+ ref1 = arg1->ref;
+ ref2 = arg2->ref;
+
+ while (ref1 && ref2) {
+ ret = ref2->offset - ref1->offset;
+ if (ret)
+ return ret;
+
+ ref1 = ref1->next;
+ ref2 = ref2->next;
+ }
+
+ if (ref1 || ref2)
+ return ref2 ? 1 : -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Assign a type number to each tevs in a pev.
+ * mapping is an array with same slots as tevs in that pev.
+ * nr_types will be set to number of types.
+ */
+static int map_prologue(struct perf_probe_event *pev, int *mapping,
+ int *nr_types)
+{
+ int i, type = 0;
+ struct probe_trace_event **ptevs;
+
+ size_t array_sz = sizeof(*ptevs) * pev->ntevs;
+
+ ptevs = malloc(array_sz);
+ if (!ptevs) {
+ pr_debug("No ehough memory: alloc ptevs failed\n");
+ return -ENOMEM;
+ }
+
+ pr_debug("In map_prologue, ntevs=%d\n", pev->ntevs);
+ for (i = 0; i < pev->ntevs; i++)
+ ptevs[i] = &pev->tevs[i];
+
+ qsort(ptevs, pev->ntevs, sizeof(*ptevs),
+ compare_tev_args);
+
+ for (i = 0; i < pev->ntevs; i++) {
+ int n;
+
+ n = ptevs[i] - pev->tevs;
+ if (i == 0) {
+ mapping[n] = type;
+ pr_debug("mapping[%d]=%d\n", n, type);
+ continue;
+ }
+
+ if (compare_tev_args(ptevs + i, ptevs + i - 1) == 0)
+ mapping[n] = type;
+ else
+ mapping[n] = ++type;
+
+ pr_debug("mapping[%d]=%d\n", n, mapping[n]);
+ }
+ free(ptevs);
+ *nr_types = type + 1;
+
+ return 0;
+}
+
+static int hook_load_preprocessor(struct bpf_program *prog)
+{
+ struct perf_probe_event *pev;
+ struct bpf_prog_priv *priv;
+ bool need_prologue = false;
+ int err, i;
+
+ err = bpf_program__get_private(prog, (void **)&priv);
+ if (err || !priv) {
+ pr_debug("Internal error when hook preprocessor\n");
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+
+ pev = &priv->pev;
+ for (i = 0; i < pev->ntevs; i++) {
+ struct probe_trace_event *tev = &pev->tevs[i];
+
+ if (tev->nargs > 0) {
+ need_prologue = true;
+ break;
+ }
+ }
+
+ /*
+ * Since all tevs don't have argument, we don't need generate
+ * prologue.
+ */
+ if (!need_prologue) {
+ priv->need_prologue = false;
+ return 0;
+ }
+
+ priv->need_prologue = true;
+ priv->insns_buf = malloc(sizeof(struct bpf_insn) * BPF_MAXINSNS);
+ if (!priv->insns_buf) {
+ pr_debug("No enough memory: alloc insns_buf failed\n");
+ return -ENOMEM;
+ }
+
+ priv->type_mapping = malloc(sizeof(int) * pev->ntevs);
+ if (!priv->type_mapping) {
+ pr_debug("No enough memory: alloc type_mapping failed\n");
+ return -ENOMEM;
+ }
+ memset(priv->type_mapping, -1,
+ sizeof(int) * pev->ntevs);
+
+ err = map_prologue(pev, priv->type_mapping, &priv->nr_types);
+ if (err)
+ return err;
+
+ err = bpf_program__set_prep(prog, priv->nr_types,
+ preproc_gen_prologue);
+ return err;
+}
+
int bpf__probe(struct bpf_object *obj)
{
int err = 0;
@@ -231,6 +621,18 @@ int bpf__probe(struct bpf_object *obj)
pr_debug("bpf_probe: failed to apply perf probe events");
goto out;
}
+
+ /*
+ * After probing, let's consider prologue, which
+ * adds program fetcher to BPF programs.
+ *
+ * hook_load_preprocessorr() hooks pre-processor
+ * to bpf_program, let it generate prologue
+ * dynamically during loading.
+ */
+ err = hook_load_preprocessor(prog);
+ if (err)
+ goto out;
}
out:
return err < 0 ? err : 0;
@@ -314,7 +716,14 @@ int bpf__foreach_tev(struct bpf_object *obj,
for (i = 0; i < pev->ntevs; i++) {
tev = &pev->tevs[i];
- fd = bpf_program__fd(prog);
+ if (priv->need_prologue) {
+ int type = priv->type_mapping[i];
+
+ fd = bpf_program__nth_fd(prog, type);
+ } else {
+ fd = bpf_program__fd(prog);
+ }
+
if (fd < 0) {
pr_debug("bpf: failed to get file descriptor\n");
return fd;
@@ -340,6 +749,10 @@ static const char *bpf_loader_strerror_table[NR_ERRNO] = {
[ERRCODE_OFFSET(EVENTNAME)] = "No event name found in config string",
[ERRCODE_OFFSET(INTERNAL)] = "BPF loader internal error",
[ERRCODE_OFFSET(COMPILE)] = "Error when compiling BPF scriptlet",
+ [ERRCODE_OFFSET(PROGCONF_TERM)] = "Invalid program config term in config string",
+ [ERRCODE_OFFSET(PROLOGUE)] = "Failed to generate prologue",
+ [ERRCODE_OFFSET(PROLOGUE2BIG)] = "Prologue too big for program",
+ [ERRCODE_OFFSET(PROLOGUEOOB)] = "Offset out of bound for prologue",
};
static int
@@ -420,7 +833,11 @@ int bpf__strerror_probe(struct bpf_object *obj __maybe_unused,
int err, char *buf, size_t size)
{
bpf__strerror_head(err, buf, size);
- bpf__strerror_entry(EEXIST, "Probe point exist. Try use 'perf probe -d \"*\"'");
+ case BPF_LOADER_ERRNO__PROGCONF_TERM: {
+ scnprintf(buf, size, "%s (add -v to see detail)", emsg);
+ break;
+ }
+ bpf__strerror_entry(EEXIST, "Probe point exist. Try 'perf probe -d \"*\"' and set 'force=yes'");
bpf__strerror_entry(EACCES, "You need to be root");
bpf__strerror_entry(EPERM, "You need to be root, and /proc/sys/kernel/kptr_restrict should be 0");
bpf__strerror_entry(ENOENT, "You need to check probing points in BPF file");
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index 9caf3ae4acf3..6fdc0457e2b6 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -20,6 +20,10 @@ enum bpf_loader_errno {
BPF_LOADER_ERRNO__EVENTNAME, /* Event name is missing */
BPF_LOADER_ERRNO__INTERNAL, /* BPF loader internal error */
BPF_LOADER_ERRNO__COMPILE, /* Error when compiling BPF scriptlet */
+ BPF_LOADER_ERRNO__PROGCONF_TERM,/* Invalid program config term in config string */
+ BPF_LOADER_ERRNO__PROLOGUE, /* Failed to generate prologue */
+ BPF_LOADER_ERRNO__PROLOGUE2BIG, /* Prologue too big for program */
+ BPF_LOADER_ERRNO__PROLOGUEOOB, /* Offset out of bound for prologue */
__BPF_LOADER_ERRNO__END,
};
diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c
new file mode 100644
index 000000000000..6cdbee119ceb
--- /dev/null
+++ b/tools/perf/util/bpf-prologue.c
@@ -0,0 +1,455 @@
+/*
+ * bpf-prologue.c
+ *
+ * Copyright (C) 2015 He Kuang <hekuang@huawei.com>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ */
+
+#include <bpf/libbpf.h>
+#include "perf.h"
+#include "debug.h"
+#include "bpf-loader.h"
+#include "bpf-prologue.h"
+#include "probe-finder.h"
+#include <dwarf-regs.h>
+#include <linux/filter.h>
+
+#define BPF_REG_SIZE 8
+
+#define JMP_TO_ERROR_CODE -1
+#define JMP_TO_SUCCESS_CODE -2
+#define JMP_TO_USER_CODE -3
+
+struct bpf_insn_pos {
+ struct bpf_insn *begin;
+ struct bpf_insn *end;
+ struct bpf_insn *pos;
+};
+
+static inline int
+pos_get_cnt(struct bpf_insn_pos *pos)
+{
+ return pos->pos - pos->begin;
+}
+
+static int
+append_insn(struct bpf_insn new_insn, struct bpf_insn_pos *pos)
+{
+ if (!pos->pos)
+ return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+
+ if (pos->pos + 1 >= pos->end) {
+ pr_err("bpf prologue: prologue too long\n");
+ pos->pos = NULL;
+ return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+ }
+
+ *(pos->pos)++ = new_insn;
+ return 0;
+}
+
+static int
+check_pos(struct bpf_insn_pos *pos)
+{
+ if (!pos->pos || pos->pos >= pos->end)
+ return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+ return 0;
+}
+
+/* Give it a shorter name */
+#define ins(i, p) append_insn((i), (p))
+
+/*
+ * Give a register name (in 'reg'), generate instruction to
+ * load register into an eBPF register rd:
+ * 'ldd target_reg, offset(ctx_reg)', where:
+ * ctx_reg is pre initialized to pointer of 'struct pt_regs'.
+ */
+static int
+gen_ldx_reg_from_ctx(struct bpf_insn_pos *pos, int ctx_reg,
+ const char *reg, int target_reg)
+{
+ int offset = regs_query_register_offset(reg);
+
+ if (offset < 0) {
+ pr_err("bpf: prologue: failed to get register %s\n",
+ reg);
+ return offset;
+ }
+ ins(BPF_LDX_MEM(BPF_DW, target_reg, ctx_reg, offset), pos);
+
+ return check_pos(pos);
+}
+
+/*
+ * Generate a BPF_FUNC_probe_read function call.
+ *
+ * src_base_addr_reg is a register holding base address,
+ * dst_addr_reg is a register holding dest address (on stack),
+ * result is:
+ *
+ * *[dst_addr_reg] = *([src_base_addr_reg] + offset)
+ *
+ * Arguments of BPF_FUNC_probe_read:
+ * ARG1: ptr to stack (dest)
+ * ARG2: size (8)
+ * ARG3: unsafe ptr (src)
+ */
+static int
+gen_read_mem(struct bpf_insn_pos *pos,
+ int src_base_addr_reg,
+ int dst_addr_reg,
+ long offset)
+{
+ /* mov arg3, src_base_addr_reg */
+ if (src_base_addr_reg != BPF_REG_ARG3)
+ ins(BPF_MOV64_REG(BPF_REG_ARG3, src_base_addr_reg), pos);
+ /* add arg3, #offset */
+ if (offset)
+ ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG3, offset), pos);
+
+ /* mov arg2, #reg_size */
+ ins(BPF_ALU64_IMM(BPF_MOV, BPF_REG_ARG2, BPF_REG_SIZE), pos);
+
+ /* mov arg1, dst_addr_reg */
+ if (dst_addr_reg != BPF_REG_ARG1)
+ ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos);
+
+ /* Call probe_read */
+ ins(BPF_EMIT_CALL(BPF_FUNC_probe_read), pos);
+ /*
+ * Error processing: if read fail, goto error code,
+ * will be relocated. Target should be the start of
+ * error processing code.
+ */
+ ins(BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, JMP_TO_ERROR_CODE),
+ pos);
+
+ return check_pos(pos);
+}
+
+/*
+ * Each arg should be bare register. Fetch and save them into argument
+ * registers (r3 - r5).
+ *
+ * BPF_REG_1 should have been initialized with pointer to
+ * 'struct pt_regs'.
+ */
+static int
+gen_prologue_fastpath(struct bpf_insn_pos *pos,
+ struct probe_trace_arg *args, int nargs)
+{
+ int i, err = 0;
+
+ for (i = 0; i < nargs; i++) {
+ err = gen_ldx_reg_from_ctx(pos, BPF_REG_1, args[i].value,
+ BPF_PROLOGUE_START_ARG_REG + i);
+ if (err)
+ goto errout;
+ }
+
+ return check_pos(pos);
+errout:
+ return err;
+}
+
+/*
+ * Slow path:
+ * At least one argument has the form of 'offset($rx)'.
+ *
+ * Following code first stores them into stack, then loads all of then
+ * to r2 - r5.
+ * Before final loading, the final result should be:
+ *
+ * low address
+ * BPF_REG_FP - 24 ARG3
+ * BPF_REG_FP - 16 ARG2
+ * BPF_REG_FP - 8 ARG1
+ * BPF_REG_FP
+ * high address
+ *
+ * For each argument (described as: offn(...off2(off1(reg)))),
+ * generates following code:
+ *
+ * r7 <- fp
+ * r7 <- r7 - stack_offset // Ideal code should initialize r7 using
+ * // fp before generating args. However,
+ * // eBPF won't regard r7 as stack pointer
+ * // if it is generated by minus 8 from
+ * // another stack pointer except fp.
+ * // This is why we have to set r7
+ * // to fp for each variable.
+ * r3 <- value of 'reg'-> generated using gen_ldx_reg_from_ctx()
+ * (r7) <- r3 // skip following instructions for bare reg
+ * r3 <- r3 + off1 . // skip if off1 == 0
+ * r2 <- 8 \
+ * r1 <- r7 |-> generated by gen_read_mem()
+ * call probe_read /
+ * jnei r0, 0, err ./
+ * r3 <- (r7)
+ * r3 <- r3 + off2 . // skip if off2 == 0
+ * r2 <- 8 \ // r2 may be broken by probe_read, so set again
+ * r1 <- r7 |-> generated by gen_read_mem()
+ * call probe_read /
+ * jnei r0, 0, err ./
+ * ...
+ */
+static int
+gen_prologue_slowpath(struct bpf_insn_pos *pos,
+ struct probe_trace_arg *args, int nargs)
+{
+ int err, i;
+
+ for (i = 0; i < nargs; i++) {
+ struct probe_trace_arg *arg = &args[i];
+ const char *reg = arg->value;
+ struct probe_trace_arg_ref *ref = NULL;
+ int stack_offset = (i + 1) * -8;
+
+ pr_debug("prologue: fetch arg %d, base reg is %s\n",
+ i, reg);
+
+ /* value of base register is stored into ARG3 */
+ err = gen_ldx_reg_from_ctx(pos, BPF_REG_CTX, reg,
+ BPF_REG_ARG3);
+ if (err) {
+ pr_err("prologue: failed to get offset of register %s\n",
+ reg);
+ goto errout;
+ }
+
+ /* Make r7 the stack pointer. */
+ ins(BPF_MOV64_REG(BPF_REG_7, BPF_REG_FP), pos);
+ /* r7 += -8 */
+ ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, stack_offset), pos);
+ /*
+ * Store r3 (base register) onto stack
+ * Ensure fp[offset] is set.
+ * fp is the only valid base register when storing
+ * into stack. We are not allowed to use r7 as base
+ * register here.
+ */
+ ins(BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_ARG3,
+ stack_offset), pos);
+
+ ref = arg->ref;
+ while (ref) {
+ pr_debug("prologue: arg %d: offset %ld\n",
+ i, ref->offset);
+ err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7,
+ ref->offset);
+ if (err) {
+ pr_err("prologue: failed to generate probe_read function call\n");
+ goto errout;
+ }
+
+ ref = ref->next;
+ /*
+ * Load previous result into ARG3. Use
+ * BPF_REG_FP instead of r7 because verifier
+ * allows FP based addressing only.
+ */
+ if (ref)
+ ins(BPF_LDX_MEM(BPF_DW, BPF_REG_ARG3,
+ BPF_REG_FP, stack_offset), pos);
+ }
+ }
+
+ /* Final pass: read to registers */
+ for (i = 0; i < nargs; i++)
+ ins(BPF_LDX_MEM(BPF_DW, BPF_PROLOGUE_START_ARG_REG + i,
+ BPF_REG_FP, -BPF_REG_SIZE * (i + 1)), pos);
+
+ ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_SUCCESS_CODE), pos);
+
+ return check_pos(pos);
+errout:
+ return err;
+}
+
+static int
+prologue_relocate(struct bpf_insn_pos *pos, struct bpf_insn *error_code,
+ struct bpf_insn *success_code, struct bpf_insn *user_code)
+{
+ struct bpf_insn *insn;
+
+ if (check_pos(pos))
+ return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+
+ for (insn = pos->begin; insn < pos->pos; insn++) {
+ struct bpf_insn *target;
+ u8 class = BPF_CLASS(insn->code);
+ u8 opcode;
+
+ if (class != BPF_JMP)
+ continue;
+ opcode = BPF_OP(insn->code);
+ if (opcode == BPF_CALL)
+ continue;
+
+ switch (insn->off) {
+ case JMP_TO_ERROR_CODE:
+ target = error_code;
+ break;
+ case JMP_TO_SUCCESS_CODE:
+ target = success_code;
+ break;
+ case JMP_TO_USER_CODE:
+ target = user_code;
+ break;
+ default:
+ pr_err("bpf prologue: internal error: relocation failed\n");
+ return -BPF_LOADER_ERRNO__PROLOGUE;
+ }
+
+ insn->off = target - (insn + 1);
+ }
+ return 0;
+}
+
+int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
+ struct bpf_insn *new_prog, size_t *new_cnt,
+ size_t cnt_space)
+{
+ struct bpf_insn *success_code = NULL;
+ struct bpf_insn *error_code = NULL;
+ struct bpf_insn *user_code = NULL;
+ struct bpf_insn_pos pos;
+ bool fastpath = true;
+ int err = 0, i;
+
+ if (!new_prog || !new_cnt)
+ return -EINVAL;
+
+ if (cnt_space > BPF_MAXINSNS)
+ cnt_space = BPF_MAXINSNS;
+
+ pos.begin = new_prog;
+ pos.end = new_prog + cnt_space;
+ pos.pos = new_prog;
+
+ if (!nargs) {
+ ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0),
+ &pos);
+
+ if (check_pos(&pos))
+ goto errout;
+
+ *new_cnt = pos_get_cnt(&pos);
+ return 0;
+ }
+
+ if (nargs > BPF_PROLOGUE_MAX_ARGS) {
+ pr_warning("bpf: prologue: %d arguments are dropped\n",
+ nargs - BPF_PROLOGUE_MAX_ARGS);
+ nargs = BPF_PROLOGUE_MAX_ARGS;
+ }
+
+ /* First pass: validation */
+ for (i = 0; i < nargs; i++) {
+ struct probe_trace_arg_ref *ref = args[i].ref;
+
+ if (args[i].value[0] == '@') {
+ /* TODO: fetch global variable */
+ pr_err("bpf: prologue: global %s%+ld not support\n",
+ args[i].value, ref ? ref->offset : 0);
+ return -ENOTSUP;
+ }
+
+ while (ref) {
+ /* fastpath is true if all args has ref == NULL */
+ fastpath = false;
+
+ /*
+ * Instruction encodes immediate value using
+ * s32, ref->offset is long. On systems which
+ * can't fill long in s32, refuse to process if
+ * ref->offset too large (or small).
+ */
+#ifdef __LP64__
+#define OFFSET_MAX ((1LL << 31) - 1)
+#define OFFSET_MIN ((1LL << 31) * -1)
+ if (ref->offset > OFFSET_MAX ||
+ ref->offset < OFFSET_MIN) {
+ pr_err("bpf: prologue: offset out of bound: %ld\n",
+ ref->offset);
+ return -BPF_LOADER_ERRNO__PROLOGUEOOB;
+ }
+#endif
+ ref = ref->next;
+ }
+ }
+ pr_debug("prologue: pass validation\n");
+
+ if (fastpath) {
+ /* If all variables are registers... */
+ pr_debug("prologue: fast path\n");
+ err = gen_prologue_fastpath(&pos, args, nargs);
+ if (err)
+ goto errout;
+ } else {
+ pr_debug("prologue: slow path\n");
+
+ /* Initialization: move ctx to a callee saved register. */
+ ins(BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1), &pos);
+
+ err = gen_prologue_slowpath(&pos, args, nargs);
+ if (err)
+ goto errout;
+ /*
+ * start of ERROR_CODE (only slow pass needs error code)
+ * mov r2 <- 1 // r2 is error number
+ * mov r3 <- 0 // r3, r4... should be touched or
+ * // verifier would complain
+ * mov r4 <- 0
+ * ...
+ * goto usercode
+ */
+ error_code = pos.pos;
+ ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 1),
+ &pos);
+
+ for (i = 0; i < nargs; i++)
+ ins(BPF_ALU64_IMM(BPF_MOV,
+ BPF_PROLOGUE_START_ARG_REG + i,
+ 0),
+ &pos);
+ ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_USER_CODE),
+ &pos);
+ }
+
+ /*
+ * start of SUCCESS_CODE:
+ * mov r2 <- 0
+ * goto usercode // skip
+ */
+ success_code = pos.pos;
+ ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), &pos);
+
+ /*
+ * start of USER_CODE:
+ * Restore ctx to r1
+ */
+ user_code = pos.pos;
+ if (!fastpath) {
+ /*
+ * Only slow path needs restoring of ctx. In fast path,
+ * register are loaded directly from r1.
+ */
+ ins(BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX), &pos);
+ err = prologue_relocate(&pos, error_code, success_code,
+ user_code);
+ if (err)
+ goto errout;
+ }
+
+ err = check_pos(&pos);
+ if (err)
+ goto errout;
+
+ *new_cnt = pos_get_cnt(&pos);
+ return 0;
+errout:
+ return err;
+}
diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h
new file mode 100644
index 000000000000..d94cbea12899
--- /dev/null
+++ b/tools/perf/util/bpf-prologue.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2015, He Kuang <hekuang@huawei.com>
+ * Copyright (C) 2015, Huawei Inc.
+ */
+#ifndef __BPF_PROLOGUE_H
+#define __BPF_PROLOGUE_H
+
+#include <linux/compiler.h>
+#include <linux/filter.h>
+#include "probe-event.h"
+
+#define BPF_PROLOGUE_MAX_ARGS 3
+#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3
+#define BPF_PROLOGUE_FETCH_RESULT_REG BPF_REG_2
+
+#ifdef HAVE_BPF_PROLOGUE
+int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
+ struct bpf_insn *new_prog, size_t *new_cnt,
+ size_t cnt_space);
+#else
+static inline int
+bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused,
+ int nargs __maybe_unused,
+ struct bpf_insn *new_prog __maybe_unused,
+ size_t *new_cnt,
+ size_t cnt_space __maybe_unused)
+{
+ if (!new_cnt)
+ return -EINVAL;
+ *new_cnt = 0;
+ return -ENOTSUP;
+}
+#endif
+#endif /* __BPF_PROLOGUE_H */
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index d909459fb54c..6a7e273a514a 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -76,6 +76,7 @@ struct perf_tool build_id__mark_dso_hit_ops = {
.exit = perf_event__exit_del_thread,
.attr = perf_event__process_attr,
.build_id = perf_event__process_build_id,
+ .ordered_events = true,
};
int build_id__sprintf(const u8 *build_id, int len, char *bf)
@@ -90,7 +91,7 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf)
bid += 2;
}
- return raw - build_id;
+ return (bid - bf) + 1;
}
int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id)
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index c861373aaed3..07b5d63947b1 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -4,9 +4,12 @@
#include <stdbool.h>
#include "util.h"
#include "strbuf.h"
+#include <subcmd/pager.h>
#include "../perf.h"
#include "../ui/ui.h"
+#include <linux/string.h>
+
#define CMD_EXEC_PATH "--exec-path"
#define CMD_PERF_DIR "--perf-dir="
#define CMD_WORK_TREE "--work-tree="
@@ -18,6 +21,7 @@
#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf"
#define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR"
#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR"
+#define PERF_PAGER_ENVIRONMENT "PERF_PAGER"
typedef int (*config_fn_t)(const char *, const char *, void *);
extern int perf_default_config(const char *, const char *, void *);
@@ -28,11 +32,6 @@ extern int perf_config_bool(const char *, const char *);
extern int config_error_nonbool(const char *);
extern const char *perf_config_dirname(const char *, const char *);
-/* pager.c */
-extern void setup_pager(void);
-extern int pager_in_use(void);
-extern int pager_use_color;
-
char *alias_lookup(const char *alias);
int split_cmdline(char *cmdline, const char ***argv);
@@ -71,9 +70,4 @@ extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2
extern char *perf_pathdup(const char *fmt, ...)
__attribute__((format (printf, 1, 2)));
-#ifndef __UCLIBC__
-/* Matches the libc/libbsd function attribute so we declare this unconditionally: */
-extern size_t strlcpy(char *dest, const char *src, size_t size);
-#endif
-
#endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 735ad48e1858..53c43eb9489e 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -44,6 +44,10 @@ static int parse_callchain_mode(const char *value)
callchain_param.mode = CHAIN_GRAPH_REL;
return 0;
}
+ if (!strncmp(value, "folded", strlen(value))) {
+ callchain_param.mode = CHAIN_FOLDED;
+ return 0;
+ }
return -1;
}
@@ -79,6 +83,23 @@ static int parse_callchain_sort_key(const char *value)
return -1;
}
+static int parse_callchain_value(const char *value)
+{
+ if (!strncmp(value, "percent", strlen(value))) {
+ callchain_param.value = CCVAL_PERCENT;
+ return 0;
+ }
+ if (!strncmp(value, "period", strlen(value))) {
+ callchain_param.value = CCVAL_PERIOD;
+ return 0;
+ }
+ if (!strncmp(value, "count", strlen(value))) {
+ callchain_param.value = CCVAL_COUNT;
+ return 0;
+ }
+ return -1;
+}
+
static int
__parse_callchain_report_opt(const char *arg, bool allow_record_opt)
{
@@ -102,7 +123,8 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
if (!parse_callchain_mode(tok) ||
!parse_callchain_order(tok) ||
- !parse_callchain_sort_key(tok)) {
+ !parse_callchain_sort_key(tok) ||
+ !parse_callchain_value(tok)) {
/* parsing ok - move on to the next */
try_stack_size = false;
goto next;
@@ -218,6 +240,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
switch (mode) {
case CHAIN_FLAT:
+ case CHAIN_FOLDED:
if (rnode->hit < chain->hit)
p = &(*p)->rb_left;
else
@@ -267,6 +290,7 @@ static void
sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
u64 min_hit, struct callchain_param *param __maybe_unused)
{
+ *rb_root = RB_ROOT;
__sort_chain_flat(rb_root, &root->node, min_hit);
}
@@ -338,6 +362,7 @@ int callchain_register_param(struct callchain_param *param)
param->sort = sort_chain_graph_rel;
break;
case CHAIN_FLAT:
+ case CHAIN_FOLDED:
param->sort = sort_chain_flat;
break;
case CHAIN_NONE:
@@ -363,6 +388,7 @@ create_child(struct callchain_node *parent, bool inherit_children)
}
new->parent = parent;
INIT_LIST_HEAD(&new->val);
+ INIT_LIST_HEAD(&new->parent_val);
if (inherit_children) {
struct rb_node *n;
@@ -431,6 +457,8 @@ add_child(struct callchain_node *parent,
new->children_hit = 0;
new->hit = period;
+ new->children_count = 0;
+ new->count = 1;
return new;
}
@@ -478,6 +506,9 @@ split_add_child(struct callchain_node *parent,
parent->children_hit = callchain_cumul_hits(new);
new->val_nr = parent->val_nr - idx_local;
parent->val_nr = idx_local;
+ new->count = parent->count;
+ new->children_count = parent->children_count;
+ parent->children_count = callchain_cumul_counts(new);
/* create a new child for the new branch if any */
if (idx_total < cursor->nr) {
@@ -488,6 +519,8 @@ split_add_child(struct callchain_node *parent,
parent->hit = 0;
parent->children_hit += period;
+ parent->count = 0;
+ parent->children_count += 1;
node = callchain_cursor_current(cursor);
new = add_child(parent, cursor, period);
@@ -510,6 +543,7 @@ split_add_child(struct callchain_node *parent,
rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
} else {
parent->hit = period;
+ parent->count = 1;
}
}
@@ -556,6 +590,7 @@ append_chain_children(struct callchain_node *root,
inc_children_hit:
root->children_hit += period;
+ root->children_count++;
}
static int
@@ -608,6 +643,7 @@ append_chain(struct callchain_node *root,
/* we match 100% of the path, increment the hit */
if (matches == root->val_nr && cursor->pos == cursor->nr) {
root->hit += period;
+ root->count++;
return 0;
}
@@ -799,12 +835,72 @@ char *callchain_list__sym_name(struct callchain_list *cl,
return bf;
}
+char *callchain_node__scnprintf_value(struct callchain_node *node,
+ char *bf, size_t bfsize, u64 total)
+{
+ double percent = 0.0;
+ u64 period = callchain_cumul_hits(node);
+ unsigned count = callchain_cumul_counts(node);
+
+ if (callchain_param.mode == CHAIN_FOLDED) {
+ period = node->hit;
+ count = node->count;
+ }
+
+ switch (callchain_param.value) {
+ case CCVAL_PERIOD:
+ scnprintf(bf, bfsize, "%"PRIu64, period);
+ break;
+ case CCVAL_COUNT:
+ scnprintf(bf, bfsize, "%u", count);
+ break;
+ case CCVAL_PERCENT:
+ default:
+ if (total)
+ percent = period * 100.0 / total;
+ scnprintf(bf, bfsize, "%.2f%%", percent);
+ break;
+ }
+ return bf;
+}
+
+int callchain_node__fprintf_value(struct callchain_node *node,
+ FILE *fp, u64 total)
+{
+ double percent = 0.0;
+ u64 period = callchain_cumul_hits(node);
+ unsigned count = callchain_cumul_counts(node);
+
+ if (callchain_param.mode == CHAIN_FOLDED) {
+ period = node->hit;
+ count = node->count;
+ }
+
+ switch (callchain_param.value) {
+ case CCVAL_PERIOD:
+ return fprintf(fp, "%"PRIu64, period);
+ case CCVAL_COUNT:
+ return fprintf(fp, "%u", count);
+ case CCVAL_PERCENT:
+ default:
+ if (total)
+ percent = period * 100.0 / total;
+ return percent_color_fprintf(fp, "%.2f%%", percent);
+ }
+ return 0;
+}
+
static void free_callchain_node(struct callchain_node *node)
{
struct callchain_list *list, *tmp;
struct callchain_node *child;
struct rb_node *n;
+ list_for_each_entry_safe(list, tmp, &node->parent_val, list) {
+ list_del(&list->list);
+ free(list);
+ }
+
list_for_each_entry_safe(list, tmp, &node->val, list) {
list_del(&list->list);
free(list);
@@ -828,3 +924,69 @@ void free_callchain(struct callchain_root *root)
free_callchain_node(&root->node);
}
+
+static u64 decay_callchain_node(struct callchain_node *node)
+{
+ struct callchain_node *child;
+ struct rb_node *n;
+ u64 child_hits = 0;
+
+ n = rb_first(&node->rb_root_in);
+ while (n) {
+ child = container_of(n, struct callchain_node, rb_node_in);
+
+ child_hits += decay_callchain_node(child);
+ n = rb_next(n);
+ }
+
+ node->hit = (node->hit * 7) / 8;
+ node->children_hit = child_hits;
+
+ return node->hit;
+}
+
+void decay_callchain(struct callchain_root *root)
+{
+ if (!symbol_conf.use_callchain)
+ return;
+
+ decay_callchain_node(&root->node);
+}
+
+int callchain_node__make_parent_list(struct callchain_node *node)
+{
+ struct callchain_node *parent = node->parent;
+ struct callchain_list *chain, *new;
+ LIST_HEAD(head);
+
+ while (parent) {
+ list_for_each_entry_reverse(chain, &parent->val, list) {
+ new = malloc(sizeof(*new));
+ if (new == NULL)
+ goto out;
+ *new = *chain;
+ new->has_children = false;
+ list_add_tail(&new->list, &head);
+ }
+ parent = parent->parent;
+ }
+
+ list_for_each_entry_safe_reverse(chain, new, &head, list)
+ list_move_tail(&chain->list, &node->parent_val);
+
+ if (!list_empty(&node->parent_val)) {
+ chain = list_first_entry(&node->parent_val, struct callchain_list, list);
+ chain->has_children = rb_prev(&node->rb_node) || rb_next(&node->rb_node);
+
+ chain = list_first_entry(&node->val, struct callchain_list, list);
+ chain->has_children = false;
+ }
+ return 0;
+
+out:
+ list_for_each_entry_safe(chain, new, &head, list) {
+ list_del(&chain->list);
+ free(chain);
+ }
+ return -ENOMEM;
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index fce8161e54db..18dd22269764 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -24,12 +24,13 @@
#define CALLCHAIN_RECORD_HELP CALLCHAIN_HELP RECORD_MODE_HELP RECORD_SIZE_HELP
#define CALLCHAIN_REPORT_HELP \
- HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|none)\n" \
+ HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|folded|none)\n" \
HELP_PAD "threshold:\tminimum call graph inclusion threshold (<percent>)\n" \
HELP_PAD "print_limit:\tmaximum number of call graph entry (<number>)\n" \
HELP_PAD "order:\t\tcall graph order (caller|callee)\n" \
HELP_PAD "sort_key:\tcall graph sort key (function|address)\n" \
- HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n"
+ HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n" \
+ HELP_PAD "value:\t\tcall graph value (percent|period|count)\n"
enum perf_call_graph_mode {
CALLCHAIN_NONE,
@@ -43,7 +44,8 @@ enum chain_mode {
CHAIN_NONE,
CHAIN_FLAT,
CHAIN_GRAPH_ABS,
- CHAIN_GRAPH_REL
+ CHAIN_GRAPH_REL,
+ CHAIN_FOLDED,
};
enum chain_order {
@@ -54,11 +56,14 @@ enum chain_order {
struct callchain_node {
struct callchain_node *parent;
struct list_head val;
+ struct list_head parent_val;
struct rb_node rb_node_in; /* to insert nodes in an rbtree */
struct rb_node rb_node; /* to sort nodes in an output tree */
struct rb_root rb_root_in; /* input tree of children */
struct rb_root rb_root; /* sorted output tree of children */
unsigned int val_nr;
+ unsigned int count;
+ unsigned int children_count;
u64 hit;
u64 children_hit;
};
@@ -78,6 +83,12 @@ enum chain_key {
CCKEY_ADDRESS
};
+enum chain_value {
+ CCVAL_PERCENT,
+ CCVAL_PERIOD,
+ CCVAL_COUNT,
+};
+
struct callchain_param {
bool enabled;
enum perf_call_graph_mode record_mode;
@@ -90,6 +101,7 @@ struct callchain_param {
bool order_set;
enum chain_key key;
bool branch_callstack;
+ enum chain_value value;
};
extern struct callchain_param callchain_param;
@@ -131,6 +143,7 @@ extern __thread struct callchain_cursor callchain_cursor;
static inline void callchain_init(struct callchain_root *root)
{
INIT_LIST_HEAD(&root->node.val);
+ INIT_LIST_HEAD(&root->node.parent_val);
root->node.parent = NULL;
root->node.hit = 0;
@@ -144,6 +157,11 @@ static inline u64 callchain_cumul_hits(struct callchain_node *node)
return node->hit + node->children_hit;
}
+static inline unsigned callchain_cumul_counts(struct callchain_node *node)
+{
+ return node->count + node->children_count;
+}
+
int callchain_register_param(struct callchain_param *param);
int callchain_append(struct callchain_root *root,
struct callchain_cursor *cursor,
@@ -229,7 +247,13 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
char *callchain_list__sym_name(struct callchain_list *cl,
char *bf, size_t bfsize, bool show_dso);
+char *callchain_node__scnprintf_value(struct callchain_node *node,
+ char *bf, size_t bfsize, u64 total);
+int callchain_node__fprintf_value(struct callchain_node *node,
+ FILE *fp, u64 total);
void free_callchain(struct callchain_root *root);
+void decay_callchain(struct callchain_root *root);
+int callchain_node__make_parent_list(struct callchain_node *node);
#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 32e12ecfe9c5..90aa1b46b2e5 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -1,6 +1,6 @@
#include "util.h"
#include "../perf.h"
-#include "parse-options.h"
+#include <subcmd/parse-options.h>
#include "evsel.h"
#include "cgroup.h"
#include "evlist.h"
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
index 9b9565416f90..e5fb88bab9e1 100644
--- a/tools/perf/util/color.c
+++ b/tools/perf/util/color.c
@@ -24,7 +24,7 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
auto_color:
if (stdout_is_tty < 0)
stdout_is_tty = isatty(1);
- if (stdout_is_tty || (pager_in_use() && pager_use_color)) {
+ if (stdout_is_tty || pager_in_use()) {
char *term = getenv("TERM");
if (term && strcmp(term, "dumb"))
return 1;
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 2e452ac1353d..d3e12e30e1d5 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -10,7 +10,7 @@
*/
#include "util.h"
#include "cache.h"
-#include "exec_cmd.h"
+#include <subcmd/exec-cmd.h>
#include "util/hist.h" /* perf_hist_config */
#include "util/llvm-utils.h" /* perf_llvm_config */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 10af1e7524fb..fa935093a599 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -5,6 +5,7 @@
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
+#include <linux/bitmap.h>
#include "asm/bug.h"
static struct cpu_map *cpu_map__default_new(void)
@@ -179,6 +180,56 @@ out:
return cpus;
}
+static struct cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus)
+{
+ struct cpu_map *map;
+
+ map = cpu_map__empty_new(cpus->nr);
+ if (map) {
+ unsigned i;
+
+ for (i = 0; i < cpus->nr; i++) {
+ /*
+ * Special treatment for -1, which is not real cpu number,
+ * and we need to use (int) -1 to initialize map[i],
+ * otherwise it would become 65535.
+ */
+ if (cpus->cpu[i] == (u16) -1)
+ map->map[i] = -1;
+ else
+ map->map[i] = (int) cpus->cpu[i];
+ }
+ }
+
+ return map;
+}
+
+static struct cpu_map *cpu_map__from_mask(struct cpu_map_mask *mask)
+{
+ struct cpu_map *map;
+ int nr, nbits = mask->nr * mask->long_size * BITS_PER_BYTE;
+
+ nr = bitmap_weight(mask->mask, nbits);
+
+ map = cpu_map__empty_new(nr);
+ if (map) {
+ int cpu, i = 0;
+
+ for_each_set_bit(cpu, mask->mask, nbits)
+ map->map[i++] = cpu;
+ }
+ return map;
+
+}
+
+struct cpu_map *cpu_map__new_data(struct cpu_map_data *data)
+{
+ if (data->type == PERF_CPU_MAP__CPUS)
+ return cpu_map__from_entries((struct cpu_map_entries *)data->data);
+ else
+ return cpu_map__from_mask((struct cpu_map_mask *)data->data);
+}
+
size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp)
{
int i;
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 85f7772457fa..71c41b9efabb 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -17,6 +17,7 @@ struct cpu_map {
struct cpu_map *cpu_map__new(const char *cpu_list);
struct cpu_map *cpu_map__empty_new(int nr);
struct cpu_map *cpu_map__dummy_new(void);
+struct cpu_map *cpu_map__new_data(struct cpu_map_data *data);
struct cpu_map *cpu_map__read(FILE *file);
size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
int cpu_map__get_socket_id(int cpu);
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 5bfc1198ab46..34cd1e4039d3 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -63,6 +63,7 @@ struct ctf_writer {
struct bt_ctf_field_type *s32;
struct bt_ctf_field_type *u32;
struct bt_ctf_field_type *string;
+ struct bt_ctf_field_type *u32_hex;
struct bt_ctf_field_type *u64_hex;
};
struct bt_ctf_field_type *array[6];
@@ -982,6 +983,7 @@ do { \
CREATE_INT_TYPE(cw->data.u64, 64, false, false);
CREATE_INT_TYPE(cw->data.s32, 32, true, false);
CREATE_INT_TYPE(cw->data.u32, 32, false, false);
+ CREATE_INT_TYPE(cw->data.u32_hex, 32, false, true);
CREATE_INT_TYPE(cw->data.u64_hex, 64, false, true);
cw->data.string = bt_ctf_field_type_string_create();
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 7c0c08386a1d..e8e9a9dbf5e3 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -933,6 +933,7 @@ static struct dso *__dso__findlink_by_longname(struct rb_root *root,
/* Add new node and rebalance tree */
rb_link_node(&dso->rb_node, parent, p);
rb_insert_color(&dso->rb_node, root);
+ dso->root = root;
}
return NULL;
}
@@ -945,15 +946,30 @@ static inline struct dso *__dso__find_by_longname(struct rb_root *root,
void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
{
+ struct rb_root *root = dso->root;
+
if (name == NULL)
return;
if (dso->long_name_allocated)
free((char *)dso->long_name);
+ if (root) {
+ rb_erase(&dso->rb_node, root);
+ /*
+ * __dso__findlink_by_longname() isn't guaranteed to add it
+ * back, so a clean removal is required here.
+ */
+ RB_CLEAR_NODE(&dso->rb_node);
+ dso->root = NULL;
+ }
+
dso->long_name = name;
dso->long_name_len = strlen(name);
dso->long_name_allocated = name_allocated;
+
+ if (root)
+ __dso__findlink_by_longname(root, dso, NULL);
}
void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated)
@@ -1046,6 +1062,7 @@ struct dso *dso__new(const char *name)
dso->kernel = DSO_TYPE_USER;
dso->needs_swap = DSO_SWAP__UNSET;
RB_CLEAR_NODE(&dso->rb_node);
+ dso->root = NULL;
INIT_LIST_HEAD(&dso->node);
INIT_LIST_HEAD(&dso->data.open_entry);
pthread_mutex_init(&dso->lock, NULL);
@@ -1226,6 +1243,8 @@ struct dso *__dsos__addnew(struct dsos *dsos, const char *name)
if (dso != NULL) {
__dsos__add(dsos, dso);
dso__set_basename(dso);
+ /* Put dso here because __dsos_add already got it */
+ dso__put(dso);
}
return dso;
}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index fc8db9c764ac..45ec4d0a50ed 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -135,6 +135,7 @@ struct dso {
pthread_mutex_t lock;
struct list_head node;
struct rb_node rb_node; /* rbtree node sorted by long name */
+ struct rb_root *root; /* root of rbtree that rb_node is in */
struct rb_root symbols[MAP__NR_TYPES];
struct rb_root symbol_names[MAP__NR_TYPES];
struct {
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 6af4f7c36820..7dd5939dea2e 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -25,15 +25,6 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
{
int i;
- /*
- * If env->cmdline_argv has already been set, do not override it. This allows
- * a command to set the cmdline, parse args and then call another
- * builtin function that implements a command -- e.g, cmd_kvm calling
- * cmd_record.
- */
- if (env->cmdline_argv != NULL)
- return 0;
-
/* do not include NULL termination */
env->cmdline_argv = calloc(argc, sizeof(char *));
if (env->cmdline_argv == NULL)
diff --git a/tools/perf/util/environment.c b/tools/perf/util/environment.c
deleted file mode 100644
index 7405123692f1..000000000000
--- a/tools/perf/util/environment.c
+++ /dev/null
@@ -1,8 +0,0 @@
-/*
- * We put all the perf config variables in this same object
- * file, so that programs can link against the config parser
- * without having to link against all the rest of perf.
- */
-#include "cache.h"
-
-int pager_use_color = 1;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 8b10621b415c..cd61bb1f3917 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -10,6 +10,8 @@
#include "thread.h"
#include "thread_map.h"
#include "symbol/kallsyms.h"
+#include "asm/bug.h"
+#include "stat.h"
static const char *perf_event__names[] = {
[0] = "TOTAL",
@@ -37,6 +39,12 @@ static const char *perf_event__names[] = {
[PERF_RECORD_AUXTRACE_INFO] = "AUXTRACE_INFO",
[PERF_RECORD_AUXTRACE] = "AUXTRACE",
[PERF_RECORD_AUXTRACE_ERROR] = "AUXTRACE_ERROR",
+ [PERF_RECORD_THREAD_MAP] = "THREAD_MAP",
+ [PERF_RECORD_CPU_MAP] = "CPU_MAP",
+ [PERF_RECORD_STAT_CONFIG] = "STAT_CONFIG",
+ [PERF_RECORD_STAT] = "STAT",
+ [PERF_RECORD_STAT_ROUND] = "STAT_ROUND",
+ [PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE",
};
const char *perf_event__name(unsigned int id)
@@ -699,6 +707,274 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
return err;
}
+int perf_event__synthesize_thread_map2(struct perf_tool *tool,
+ struct thread_map *threads,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ union perf_event *event;
+ int i, err, size;
+
+ size = sizeof(event->thread_map);
+ size += threads->nr * sizeof(event->thread_map.entries[0]);
+
+ event = zalloc(size);
+ if (!event)
+ return -ENOMEM;
+
+ event->header.type = PERF_RECORD_THREAD_MAP;
+ event->header.size = size;
+ event->thread_map.nr = threads->nr;
+
+ for (i = 0; i < threads->nr; i++) {
+ struct thread_map_event_entry *entry = &event->thread_map.entries[i];
+ char *comm = thread_map__comm(threads, i);
+
+ if (!comm)
+ comm = (char *) "";
+
+ entry->pid = thread_map__pid(threads, i);
+ strncpy((char *) &entry->comm, comm, sizeof(entry->comm));
+ }
+
+ err = process(tool, event, NULL, machine);
+
+ free(event);
+ return err;
+}
+
+static void synthesize_cpus(struct cpu_map_entries *cpus,
+ struct cpu_map *map)
+{
+ int i;
+
+ cpus->nr = map->nr;
+
+ for (i = 0; i < map->nr; i++)
+ cpus->cpu[i] = map->map[i];
+}
+
+static void synthesize_mask(struct cpu_map_mask *mask,
+ struct cpu_map *map, int max)
+{
+ int i;
+
+ mask->nr = BITS_TO_LONGS(max);
+ mask->long_size = sizeof(long);
+
+ for (i = 0; i < map->nr; i++)
+ set_bit(map->map[i], mask->mask);
+}
+
+static size_t cpus_size(struct cpu_map *map)
+{
+ return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16);
+}
+
+static size_t mask_size(struct cpu_map *map, int *max)
+{
+ int i;
+
+ *max = 0;
+
+ for (i = 0; i < map->nr; i++) {
+ /* bit possition of the cpu is + 1 */
+ int bit = map->map[i] + 1;
+
+ if (bit > *max)
+ *max = bit;
+ }
+
+ return sizeof(struct cpu_map_mask) + BITS_TO_LONGS(*max) * sizeof(long);
+}
+
+void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max)
+{
+ size_t size_cpus, size_mask;
+ bool is_dummy = cpu_map__empty(map);
+
+ /*
+ * Both array and mask data have variable size based
+ * on the number of cpus and their actual values.
+ * The size of the 'struct cpu_map_data' is:
+ *
+ * array = size of 'struct cpu_map_entries' +
+ * number of cpus * sizeof(u64)
+ *
+ * mask = size of 'struct cpu_map_mask' +
+ * maximum cpu bit converted to size of longs
+ *
+ * and finaly + the size of 'struct cpu_map_data'.
+ */
+ size_cpus = cpus_size(map);
+ size_mask = mask_size(map, max);
+
+ if (is_dummy || (size_cpus < size_mask)) {
+ *size += size_cpus;
+ *type = PERF_CPU_MAP__CPUS;
+ } else {
+ *size += size_mask;
+ *type = PERF_CPU_MAP__MASK;
+ }
+
+ *size += sizeof(struct cpu_map_data);
+ return zalloc(*size);
+}
+
+void cpu_map_data__synthesize(struct cpu_map_data *data, struct cpu_map *map,
+ u16 type, int max)
+{
+ data->type = type;
+
+ switch (type) {
+ case PERF_CPU_MAP__CPUS:
+ synthesize_cpus((struct cpu_map_entries *) data->data, map);
+ break;
+ case PERF_CPU_MAP__MASK:
+ synthesize_mask((struct cpu_map_mask *) data->data, map, max);
+ default:
+ break;
+ };
+}
+
+static struct cpu_map_event* cpu_map_event__new(struct cpu_map *map)
+{
+ size_t size = sizeof(struct cpu_map_event);
+ struct cpu_map_event *event;
+ int max;
+ u16 type;
+
+ event = cpu_map_data__alloc(map, &size, &type, &max);
+ if (!event)
+ return NULL;
+
+ event->header.type = PERF_RECORD_CPU_MAP;
+ event->header.size = size;
+ event->data.type = type;
+
+ cpu_map_data__synthesize(&event->data, map, type, max);
+ return event;
+}
+
+int perf_event__synthesize_cpu_map(struct perf_tool *tool,
+ struct cpu_map *map,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ struct cpu_map_event *event;
+ int err;
+
+ event = cpu_map_event__new(map);
+ if (!event)
+ return -ENOMEM;
+
+ err = process(tool, (union perf_event *) event, NULL, machine);
+
+ free(event);
+ return err;
+}
+
+int perf_event__synthesize_stat_config(struct perf_tool *tool,
+ struct perf_stat_config *config,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ struct stat_config_event *event;
+ int size, i = 0, err;
+
+ size = sizeof(*event);
+ size += (PERF_STAT_CONFIG_TERM__MAX * sizeof(event->data[0]));
+
+ event = zalloc(size);
+ if (!event)
+ return -ENOMEM;
+
+ event->header.type = PERF_RECORD_STAT_CONFIG;
+ event->header.size = size;
+ event->nr = PERF_STAT_CONFIG_TERM__MAX;
+
+#define ADD(__term, __val) \
+ event->data[i].tag = PERF_STAT_CONFIG_TERM__##__term; \
+ event->data[i].val = __val; \
+ i++;
+
+ ADD(AGGR_MODE, config->aggr_mode)
+ ADD(INTERVAL, config->interval)
+ ADD(SCALE, config->scale)
+
+ WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX,
+ "stat config terms unbalanced\n");
+#undef ADD
+
+ err = process(tool, (union perf_event *) event, NULL, machine);
+
+ free(event);
+ return err;
+}
+
+int perf_event__synthesize_stat(struct perf_tool *tool,
+ u32 cpu, u32 thread, u64 id,
+ struct perf_counts_values *count,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ struct stat_event event;
+
+ event.header.type = PERF_RECORD_STAT;
+ event.header.size = sizeof(event);
+ event.header.misc = 0;
+
+ event.id = id;
+ event.cpu = cpu;
+ event.thread = thread;
+ event.val = count->val;
+ event.ena = count->ena;
+ event.run = count->run;
+
+ return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+int perf_event__synthesize_stat_round(struct perf_tool *tool,
+ u64 evtime, u64 type,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ struct stat_round_event event;
+
+ event.header.type = PERF_RECORD_STAT_ROUND;
+ event.header.size = sizeof(event);
+ event.header.misc = 0;
+
+ event.time = evtime;
+ event.type = type;
+
+ return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+void perf_event__read_stat_config(struct perf_stat_config *config,
+ struct stat_config_event *event)
+{
+ unsigned i;
+
+ for (i = 0; i < event->nr; i++) {
+
+ switch (event->data[i].tag) {
+#define CASE(__term, __val) \
+ case PERF_STAT_CONFIG_TERM__##__term: \
+ config->__val = event->data[i].val; \
+ break;
+
+ CASE(AGGR_MODE, aggr_mode)
+ CASE(SCALE, scale)
+ CASE(INTERVAL, interval)
+#undef CASE
+ default:
+ pr_warning("unknown stat config term %" PRIu64 "\n",
+ event->data[i].tag);
+ }
+ }
+}
+
size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp)
{
const char *s;
@@ -783,6 +1059,38 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp)
event->mmap2.filename);
}
+size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp)
+{
+ struct thread_map *threads = thread_map__new_event(&event->thread_map);
+ size_t ret;
+
+ ret = fprintf(fp, " nr: ");
+
+ if (threads)
+ ret += thread_map__fprintf(threads, fp);
+ else
+ ret += fprintf(fp, "failed to get threads from event\n");
+
+ thread_map__put(threads);
+ return ret;
+}
+
+size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp)
+{
+ struct cpu_map *cpus = cpu_map__new_data(&event->cpu_map.data);
+ size_t ret;
+
+ ret = fprintf(fp, " nr: ");
+
+ if (cpus)
+ ret += cpu_map__fprintf(cpus, fp);
+ else
+ ret += fprintf(fp, "failed to get cpumap from event\n");
+
+ cpu_map__put(cpus);
+ return ret;
+}
+
int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index a0dbcbd4f6d8..b7ffb7ee9971 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -226,6 +226,12 @@ enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_AUXTRACE_INFO = 70,
PERF_RECORD_AUXTRACE = 71,
PERF_RECORD_AUXTRACE_ERROR = 72,
+ PERF_RECORD_THREAD_MAP = 73,
+ PERF_RECORD_CPU_MAP = 74,
+ PERF_RECORD_STAT_CONFIG = 75,
+ PERF_RECORD_STAT = 76,
+ PERF_RECORD_STAT_ROUND = 77,
+ PERF_RECORD_EVENT_UPDATE = 78,
PERF_RECORD_HEADER_MAX
};
@@ -270,12 +276,61 @@ struct events_stats {
u32 nr_proc_map_timeout;
};
+enum {
+ PERF_CPU_MAP__CPUS = 0,
+ PERF_CPU_MAP__MASK = 1,
+};
+
+struct cpu_map_entries {
+ u16 nr;
+ u16 cpu[];
+};
+
+struct cpu_map_mask {
+ u16 nr;
+ u16 long_size;
+ unsigned long mask[];
+};
+
+struct cpu_map_data {
+ u16 type;
+ char data[];
+};
+
+struct cpu_map_event {
+ struct perf_event_header header;
+ struct cpu_map_data data;
+};
+
struct attr_event {
struct perf_event_header header;
struct perf_event_attr attr;
u64 id[];
};
+enum {
+ PERF_EVENT_UPDATE__UNIT = 0,
+ PERF_EVENT_UPDATE__SCALE = 1,
+ PERF_EVENT_UPDATE__NAME = 2,
+ PERF_EVENT_UPDATE__CPUS = 3,
+};
+
+struct event_update_event_cpus {
+ struct cpu_map_data cpus;
+};
+
+struct event_update_event_scale {
+ double scale;
+};
+
+struct event_update_event {
+ struct perf_event_header header;
+ u64 type;
+ u64 id;
+
+ char data[];
+};
+
#define MAX_EVENT_NAME 64
struct perf_trace_event_type {
@@ -356,6 +411,63 @@ struct context_switch_event {
u32 next_prev_tid;
};
+struct thread_map_event_entry {
+ u64 pid;
+ char comm[16];
+};
+
+struct thread_map_event {
+ struct perf_event_header header;
+ u64 nr;
+ struct thread_map_event_entry entries[];
+};
+
+enum {
+ PERF_STAT_CONFIG_TERM__AGGR_MODE = 0,
+ PERF_STAT_CONFIG_TERM__INTERVAL = 1,
+ PERF_STAT_CONFIG_TERM__SCALE = 2,
+ PERF_STAT_CONFIG_TERM__MAX = 3,
+};
+
+struct stat_config_event_entry {
+ u64 tag;
+ u64 val;
+};
+
+struct stat_config_event {
+ struct perf_event_header header;
+ u64 nr;
+ struct stat_config_event_entry data[];
+};
+
+struct stat_event {
+ struct perf_event_header header;
+
+ u64 id;
+ u32 cpu;
+ u32 thread;
+
+ union {
+ struct {
+ u64 val;
+ u64 ena;
+ u64 run;
+ };
+ u64 values[3];
+ };
+};
+
+enum {
+ PERF_STAT_ROUND_TYPE__INTERVAL = 0,
+ PERF_STAT_ROUND_TYPE__FINAL = 1,
+};
+
+struct stat_round_event {
+ struct perf_event_header header;
+ u64 type;
+ u64 time;
+};
+
union perf_event {
struct perf_event_header header;
struct mmap_event mmap;
@@ -368,6 +480,7 @@ union perf_event {
struct throttle_event throttle;
struct sample_event sample;
struct attr_event attr;
+ struct event_update_event event_update;
struct event_type_event event_type;
struct tracing_data_event tracing_data;
struct build_id_event build_id;
@@ -378,12 +491,20 @@ union perf_event {
struct aux_event aux;
struct itrace_start_event itrace_start;
struct context_switch_event context_switch;
+ struct thread_map_event thread_map;
+ struct cpu_map_event cpu_map;
+ struct stat_config_event stat_config;
+ struct stat_event stat;
+ struct stat_round_event stat_round;
};
void perf_event__print_totals(void);
struct perf_tool;
struct thread_map;
+struct cpu_map;
+struct perf_stat_config;
+struct perf_counts_values;
typedef int (*perf_event__handler_t)(struct perf_tool *tool,
union perf_event *event,
@@ -395,6 +516,14 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine, bool mmap_data,
unsigned int proc_map_timeout);
+int perf_event__synthesize_thread_map2(struct perf_tool *tool,
+ struct thread_map *threads,
+ perf_event__handler_t process,
+ struct machine *machine);
+int perf_event__synthesize_cpu_map(struct perf_tool *tool,
+ struct cpu_map *cpus,
+ perf_event__handler_t process,
+ struct machine *machine);
int perf_event__synthesize_threads(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine, bool mmap_data,
@@ -402,7 +531,21 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine);
-
+int perf_event__synthesize_stat_config(struct perf_tool *tool,
+ struct perf_stat_config *config,
+ perf_event__handler_t process,
+ struct machine *machine);
+void perf_event__read_stat_config(struct perf_stat_config *config,
+ struct stat_config_event *event);
+int perf_event__synthesize_stat(struct perf_tool *tool,
+ u32 cpu, u32 thread, u64 id,
+ struct perf_counts_values *count,
+ perf_event__handler_t process,
+ struct machine *machine);
+int perf_event__synthesize_stat_round(struct perf_tool *tool,
+ u64 time, u64 type,
+ perf_event__handler_t process,
+ struct machine *machine);
int perf_event__synthesize_modules(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine);
@@ -499,9 +642,14 @@ size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
size_t perf_event__fprintf(union perf_event *event, FILE *fp);
u64 kallsyms__get_function_start(const char *kallsyms_filename,
const char *symbol_name);
+void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max);
+void cpu_map_data__synthesize(struct cpu_map_data *data, struct cpu_map *map,
+ u16 type, int max);
#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index d1392194a9a9..d81f13de2476 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -18,7 +18,7 @@
#include <unistd.h>
#include "parse-events.h"
-#include "parse-options.h"
+#include <subcmd/parse-options.h>
#include <sys/mman.h>
@@ -68,6 +68,18 @@ struct perf_evlist *perf_evlist__new_default(void)
return evlist;
}
+struct perf_evlist *perf_evlist__new_dummy(void)
+{
+ struct perf_evlist *evlist = perf_evlist__new();
+
+ if (evlist && perf_evlist__add_dummy(evlist)) {
+ perf_evlist__delete(evlist);
+ evlist = NULL;
+ }
+
+ return evlist;
+}
+
/**
* perf_evlist__set_id_pos - set the positions of event ids.
* @evlist: selected event list
@@ -248,6 +260,22 @@ error:
return -ENOMEM;
}
+int perf_evlist__add_dummy(struct perf_evlist *evlist)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_DUMMY,
+ .size = sizeof(attr), /* to capture ABI version */
+ };
+ struct perf_evsel *evsel = perf_evsel__new(&attr);
+
+ if (evsel == NULL)
+ return -ENOMEM;
+
+ perf_evlist__add(evlist, evsel);
+ return 0;
+}
+
static int perf_evlist__add_attrs(struct perf_evlist *evlist,
struct perf_event_attr *attrs, size_t nr_attrs)
{
@@ -336,20 +364,12 @@ static int perf_evlist__nr_threads(struct perf_evlist *evlist,
void perf_evlist__disable(struct perf_evlist *evlist)
{
- int cpu, thread;
struct perf_evsel *pos;
- int nr_cpus = cpu_map__nr(evlist->cpus);
- int nr_threads;
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- evlist__for_each(evlist, pos) {
- if (!perf_evsel__is_group_leader(pos) || !pos->fd)
- continue;
- nr_threads = perf_evlist__nr_threads(evlist, pos);
- for (thread = 0; thread < nr_threads; thread++)
- ioctl(FD(pos, cpu, thread),
- PERF_EVENT_IOC_DISABLE, 0);
- }
+ evlist__for_each(evlist, pos) {
+ if (!perf_evsel__is_group_leader(pos) || !pos->fd)
+ continue;
+ perf_evsel__disable(pos);
}
evlist->enabled = false;
@@ -357,20 +377,12 @@ void perf_evlist__disable(struct perf_evlist *evlist)
void perf_evlist__enable(struct perf_evlist *evlist)
{
- int cpu, thread;
struct perf_evsel *pos;
- int nr_cpus = cpu_map__nr(evlist->cpus);
- int nr_threads;
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- evlist__for_each(evlist, pos) {
- if (!perf_evsel__is_group_leader(pos) || !pos->fd)
- continue;
- nr_threads = perf_evlist__nr_threads(evlist, pos);
- for (thread = 0; thread < nr_threads; thread++)
- ioctl(FD(pos, cpu, thread),
- PERF_EVENT_IOC_ENABLE, 0);
- }
+ evlist__for_each(evlist, pos) {
+ if (!perf_evsel__is_group_leader(pos) || !pos->fd)
+ continue;
+ perf_evsel__enable(pos);
}
evlist->enabled = true;
@@ -381,48 +393,6 @@ void perf_evlist__toggle_enable(struct perf_evlist *evlist)
(evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
}
-int perf_evlist__disable_event(struct perf_evlist *evlist,
- struct perf_evsel *evsel)
-{
- int cpu, thread, err;
- int nr_cpus = cpu_map__nr(evlist->cpus);
- int nr_threads = perf_evlist__nr_threads(evlist, evsel);
-
- if (!evsel->fd)
- return 0;
-
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- for (thread = 0; thread < nr_threads; thread++) {
- err = ioctl(FD(evsel, cpu, thread),
- PERF_EVENT_IOC_DISABLE, 0);
- if (err)
- return err;
- }
- }
- return 0;
-}
-
-int perf_evlist__enable_event(struct perf_evlist *evlist,
- struct perf_evsel *evsel)
-{
- int cpu, thread, err;
- int nr_cpus = cpu_map__nr(evlist->cpus);
- int nr_threads = perf_evlist__nr_threads(evlist, evsel);
-
- if (!evsel->fd)
- return -EINVAL;
-
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- for (thread = 0; thread < nr_threads; thread++) {
- err = ioctl(FD(evsel, cpu, thread),
- PERF_EVENT_IOC_ENABLE, 0);
- if (err)
- return err;
- }
- }
- return 0;
-}
-
static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist,
struct perf_evsel *evsel, int cpu)
{
@@ -550,9 +520,9 @@ void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
evsel->id[evsel->ids++] = id;
}
-static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
- struct perf_evsel *evsel,
- int cpu, int thread, int fd)
+int perf_evlist__id_add_fd(struct perf_evlist *evlist,
+ struct perf_evsel *evsel,
+ int cpu, int thread, int fd)
{
u64 read_data[4] = { 0, };
int id_idx = 1; /* The first entry is the counter value */
@@ -1486,7 +1456,7 @@ int perf_evlist__open(struct perf_evlist *evlist)
perf_evlist__update_id_pos(evlist);
evlist__for_each(evlist, evsel) {
- err = perf_evsel__open(evsel, evlist->cpus, evlist->threads);
+ err = perf_evsel__open(evsel, evsel->cpus, evsel->threads);
if (err < 0)
goto out_err;
}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index a459fe71b452..7c4d9a206776 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -67,6 +67,7 @@ struct perf_evsel_str_handler {
struct perf_evlist *perf_evlist__new(void);
struct perf_evlist *perf_evlist__new_default(void);
+struct perf_evlist *perf_evlist__new_dummy(void);
void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
struct thread_map *threads);
void perf_evlist__exit(struct perf_evlist *evlist);
@@ -81,6 +82,8 @@ int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
#define perf_evlist__add_default_attrs(evlist, array) \
__perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
+int perf_evlist__add_dummy(struct perf_evlist *evlist);
+
int perf_evlist__add_newtp(struct perf_evlist *evlist,
const char *sys, const char *name, void *handler);
@@ -97,6 +100,9 @@ perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
int cpu, int thread, u64 id);
+int perf_evlist__id_add_fd(struct perf_evlist *evlist,
+ struct perf_evsel *evsel,
+ int cpu, int thread, int fd);
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
@@ -149,10 +155,6 @@ void perf_evlist__disable(struct perf_evlist *evlist);
void perf_evlist__enable(struct perf_evlist *evlist);
void perf_evlist__toggle_enable(struct perf_evlist *evlist);
-int perf_evlist__disable_event(struct perf_evlist *evlist,
- struct perf_evsel *evsel);
-int perf_evlist__enable_event(struct perf_evlist *evlist,
- struct perf_evsel *evsel);
int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
struct perf_evsel *evsel, int idx);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 397fb4ed3c97..cdbaf9b51e42 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -36,6 +36,7 @@ static struct {
bool cloexec;
bool clockid;
bool clockid_wrong;
+ bool lbr_flags;
} perf_missing_features;
static clockid_t clockid;
@@ -574,7 +575,9 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
} else {
perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
- PERF_SAMPLE_BRANCH_CALL_STACK;
+ PERF_SAMPLE_BRANCH_CALL_STACK |
+ PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_NO_FLAGS;
}
} else
pr_warning("Cannot use LBR callstack with branch stack. "
@@ -981,13 +984,26 @@ int perf_evsel__append_filter(struct perf_evsel *evsel,
return -1;
}
-int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads)
+int perf_evsel__enable(struct perf_evsel *evsel)
{
+ int nthreads = thread_map__nr(evsel->threads);
+ int ncpus = cpu_map__nr(evsel->cpus);
+
return perf_evsel__run_ioctl(evsel, ncpus, nthreads,
PERF_EVENT_IOC_ENABLE,
0);
}
+int perf_evsel__disable(struct perf_evsel *evsel)
+{
+ int nthreads = thread_map__nr(evsel->threads);
+ int ncpus = cpu_map__nr(evsel->cpus);
+
+ return perf_evsel__run_ioctl(evsel, ncpus, nthreads,
+ PERF_EVENT_IOC_DISABLE,
+ 0);
+}
+
int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
{
if (ncpus == 0 || nthreads == 0)
@@ -1192,6 +1208,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value)
bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
+ bit_name(WEIGHT),
{ .name = NULL, }
};
#undef bit_name
@@ -1323,6 +1340,9 @@ fallback_missing_features:
evsel->attr.mmap2 = 0;
if (perf_missing_features.exclude_guest)
evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
+ if (perf_missing_features.lbr_flags)
+ evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_NO_CYCLES);
retry_sample_id:
if (perf_missing_features.sample_id_all)
evsel->attr.sample_id_all = 0;
@@ -1441,6 +1461,12 @@ try_fallback:
} else if (!perf_missing_features.sample_id_all) {
perf_missing_features.sample_id_all = true;
goto retry_sample_id;
+ } else if (!perf_missing_features.lbr_flags &&
+ (evsel->attr.branch_sample_type &
+ (PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_NO_FLAGS))) {
+ perf_missing_features.lbr_flags = true;
+ goto fallback_missing_features;
}
out_close:
@@ -2272,6 +2298,29 @@ int perf_evsel__fprintf(struct perf_evsel *evsel,
printed += comma_fprintf(fp, &first, " %s=%" PRIu64,
term, (u64)evsel->attr.sample_freq);
}
+
+ if (details->trace_fields) {
+ struct format_field *field;
+
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+ printed += comma_fprintf(fp, &first, " (not a tracepoint)");
+ goto out;
+ }
+
+ field = evsel->tp_format->format.fields;
+ if (field == NULL) {
+ printed += comma_fprintf(fp, &first, " (no trace field)");
+ goto out;
+ }
+
+ printed += comma_fprintf(fp, &first, " trace_fields: %s", field->name);
+
+ field = field->next;
+ while (field) {
+ printed += comma_fprintf(fp, &first, "%s", field->name);
+ field = field->next;
+ }
+ }
out:
fputc('\n', fp);
return ++printed;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 0e49bd742c63..8e75434bd01c 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -227,7 +227,8 @@ int perf_evsel__append_filter(struct perf_evsel *evsel,
const char *op, const char *filter);
int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
const char *filter);
-int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads);
+int perf_evsel__enable(struct perf_evsel *evsel);
+int perf_evsel__disable(struct perf_evsel *evsel);
int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
struct cpu_map *cpus);
@@ -368,6 +369,7 @@ struct perf_attr_details {
bool verbose;
bool event_group;
bool force;
+ bool trace_fields;
};
int perf_evsel__fprintf(struct perf_evsel *evsel,
diff --git a/tools/perf/util/exec_cmd.c b/tools/perf/util/exec_cmd.c
deleted file mode 100644
index 7adf4ad15d8f..000000000000
--- a/tools/perf/util/exec_cmd.c
+++ /dev/null
@@ -1,148 +0,0 @@
-#include "cache.h"
-#include "exec_cmd.h"
-#include "quote.h"
-
-#include <string.h>
-
-#define MAX_ARGS 32
-
-static const char *argv_exec_path;
-static const char *argv0_path;
-
-const char *system_path(const char *path)
-{
- static const char *prefix = PREFIX;
- struct strbuf d = STRBUF_INIT;
-
- if (is_absolute_path(path))
- return path;
-
- strbuf_addf(&d, "%s/%s", prefix, path);
- path = strbuf_detach(&d, NULL);
- return path;
-}
-
-const char *perf_extract_argv0_path(const char *argv0)
-{
- const char *slash;
-
- if (!argv0 || !*argv0)
- return NULL;
- slash = argv0 + strlen(argv0);
-
- while (argv0 <= slash && !is_dir_sep(*slash))
- slash--;
-
- if (slash >= argv0) {
- argv0_path = strndup(argv0, slash - argv0);
- return argv0_path ? slash + 1 : NULL;
- }
-
- return argv0;
-}
-
-void perf_set_argv_exec_path(const char *exec_path)
-{
- argv_exec_path = exec_path;
- /*
- * Propagate this setting to external programs.
- */
- setenv(EXEC_PATH_ENVIRONMENT, exec_path, 1);
-}
-
-
-/* Returns the highest-priority, location to look for perf programs. */
-const char *perf_exec_path(void)
-{
- const char *env;
-
- if (argv_exec_path)
- return argv_exec_path;
-
- env = getenv(EXEC_PATH_ENVIRONMENT);
- if (env && *env) {
- return env;
- }
-
- return system_path(PERF_EXEC_PATH);
-}
-
-static void add_path(struct strbuf *out, const char *path)
-{
- if (path && *path) {
- if (is_absolute_path(path))
- strbuf_addstr(out, path);
- else
- strbuf_addstr(out, make_nonrelative_path(path));
-
- strbuf_addch(out, PATH_SEP);
- }
-}
-
-void setup_path(void)
-{
- const char *old_path = getenv("PATH");
- struct strbuf new_path = STRBUF_INIT;
-
- add_path(&new_path, perf_exec_path());
- add_path(&new_path, argv0_path);
-
- if (old_path)
- strbuf_addstr(&new_path, old_path);
- else
- strbuf_addstr(&new_path, "/usr/local/bin:/usr/bin:/bin");
-
- setenv("PATH", new_path.buf, 1);
-
- strbuf_release(&new_path);
-}
-
-static const char **prepare_perf_cmd(const char **argv)
-{
- int argc;
- const char **nargv;
-
- for (argc = 0; argv[argc]; argc++)
- ; /* just counting */
- nargv = malloc(sizeof(*nargv) * (argc + 2));
-
- nargv[0] = "perf";
- for (argc = 0; argv[argc]; argc++)
- nargv[argc + 1] = argv[argc];
- nargv[argc + 1] = NULL;
- return nargv;
-}
-
-int execv_perf_cmd(const char **argv) {
- const char **nargv = prepare_perf_cmd(argv);
-
- /* execvp() can only ever return if it fails */
- execvp("perf", (char **)nargv);
-
- free(nargv);
- return -1;
-}
-
-
-int execl_perf_cmd(const char *cmd,...)
-{
- int argc;
- const char *argv[MAX_ARGS + 1];
- const char *arg;
- va_list param;
-
- va_start(param, cmd);
- argv[0] = cmd;
- argc = 1;
- while (argc < MAX_ARGS) {
- arg = argv[argc++] = va_arg(param, char *);
- if (!arg)
- break;
- }
- va_end(param);
- if (MAX_ARGS <= argc)
- return error("too many args to run %s", cmd);
-
- argv[argc] = NULL;
- return execv_perf_cmd(argv);
-}
diff --git a/tools/perf/util/exec_cmd.h b/tools/perf/util/exec_cmd.h
deleted file mode 100644
index bc4b915963f5..000000000000
--- a/tools/perf/util/exec_cmd.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef __PERF_EXEC_CMD_H
-#define __PERF_EXEC_CMD_H
-
-extern void perf_set_argv_exec_path(const char *exec_path);
-extern const char *perf_extract_argv0_path(const char *path);
-extern const char *perf_exec_path(void);
-extern void setup_path(void);
-extern int execv_perf_cmd(const char **argv); /* NULL terminated */
-extern int execl_perf_cmd(const char *cmd, ...);
-extern const char *system_path(const char *path);
-
-#endif /* __PERF_EXEC_CMD_H */
diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh
index 36a885d2cd22..0ac2037c970c 100755
--- a/tools/perf/util/generate-cmdlist.sh
+++ b/tools/perf/util/generate-cmdlist.sh
@@ -36,4 +36,19 @@ do
}' "Documentation/perf-$cmd.txt"
done
echo "#endif /* HAVE_LIBELF_SUPPORT */"
+
+echo "#ifdef HAVE_LIBAUDIT_SUPPORT"
+sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+ sed -n '
+ /^NAME/,/perf-'"$cmd"'/H
+ ${
+ x
+ s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/
+ p
+ }' "Documentation/perf-$cmd.txt"
+done
+echo "#endif /* HAVE_LIBELF_SUPPORT */"
echo "};"
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 43838003c1a1..f50b7235ecb6 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -724,7 +724,7 @@ static int write_numa_topology(int fd, struct perf_header *h __maybe_unused,
done:
free(buf);
fclose(fp);
- free(node_map);
+ cpu_map__put(node_map);
return ret;
}
@@ -868,6 +868,13 @@ static int write_auxtrace(int fd, struct perf_header *h,
return err;
}
+static int write_stat(int fd __maybe_unused,
+ struct perf_header *h __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ return 0;
+}
+
static void print_hostname(struct perf_header *ph, int fd __maybe_unused,
FILE *fp)
{
@@ -1159,6 +1166,12 @@ static void print_auxtrace(struct perf_header *ph __maybe_unused,
fprintf(fp, "# contains AUX area data (e.g. instruction trace)\n");
}
+static void print_stat(struct perf_header *ph __maybe_unused,
+ int fd __maybe_unused, FILE *fp)
+{
+ fprintf(fp, "# contains stat data\n");
+}
+
static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused,
FILE *fp)
{
@@ -1948,6 +1961,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPP(HEADER_PMU_MAPPINGS, pmu_mappings),
FEAT_OPP(HEADER_GROUP_DESC, group_desc),
FEAT_OPP(HEADER_AUXTRACE, auxtrace),
+ FEAT_OPA(HEADER_STAT, stat),
};
struct header_print_data {
@@ -2686,6 +2700,152 @@ int perf_event__synthesize_attr(struct perf_tool *tool,
return err;
}
+static struct event_update_event *
+event_update_event__new(size_t size, u64 type, u64 id)
+{
+ struct event_update_event *ev;
+
+ size += sizeof(*ev);
+ size = PERF_ALIGN(size, sizeof(u64));
+
+ ev = zalloc(size);
+ if (ev) {
+ ev->header.type = PERF_RECORD_EVENT_UPDATE;
+ ev->header.size = (u16)size;
+ ev->type = type;
+ ev->id = id;
+ }
+ return ev;
+}
+
+int
+perf_event__synthesize_event_update_unit(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process)
+{
+ struct event_update_event *ev;
+ size_t size = strlen(evsel->unit);
+ int err;
+
+ ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->id[0]);
+ if (ev == NULL)
+ return -ENOMEM;
+
+ strncpy(ev->data, evsel->unit, size);
+ err = process(tool, (union perf_event *)ev, NULL, NULL);
+ free(ev);
+ return err;
+}
+
+int
+perf_event__synthesize_event_update_scale(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process)
+{
+ struct event_update_event *ev;
+ struct event_update_event_scale *ev_data;
+ int err;
+
+ ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->id[0]);
+ if (ev == NULL)
+ return -ENOMEM;
+
+ ev_data = (struct event_update_event_scale *) ev->data;
+ ev_data->scale = evsel->scale;
+ err = process(tool, (union perf_event*) ev, NULL, NULL);
+ free(ev);
+ return err;
+}
+
+int
+perf_event__synthesize_event_update_name(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process)
+{
+ struct event_update_event *ev;
+ size_t len = strlen(evsel->name);
+ int err;
+
+ ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->id[0]);
+ if (ev == NULL)
+ return -ENOMEM;
+
+ strncpy(ev->data, evsel->name, len);
+ err = process(tool, (union perf_event*) ev, NULL, NULL);
+ free(ev);
+ return err;
+}
+
+int
+perf_event__synthesize_event_update_cpus(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process)
+{
+ size_t size = sizeof(struct event_update_event);
+ struct event_update_event *ev;
+ int max, err;
+ u16 type;
+
+ if (!evsel->own_cpus)
+ return 0;
+
+ ev = cpu_map_data__alloc(evsel->own_cpus, &size, &type, &max);
+ if (!ev)
+ return -ENOMEM;
+
+ ev->header.type = PERF_RECORD_EVENT_UPDATE;
+ ev->header.size = (u16)size;
+ ev->type = PERF_EVENT_UPDATE__CPUS;
+ ev->id = evsel->id[0];
+
+ cpu_map_data__synthesize((struct cpu_map_data *) ev->data,
+ evsel->own_cpus,
+ type, max);
+
+ err = process(tool, (union perf_event*) ev, NULL, NULL);
+ free(ev);
+ return err;
+}
+
+size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
+{
+ struct event_update_event *ev = &event->event_update;
+ struct event_update_event_scale *ev_scale;
+ struct event_update_event_cpus *ev_cpus;
+ struct cpu_map *map;
+ size_t ret;
+
+ ret = fprintf(fp, "\n... id: %" PRIu64 "\n", ev->id);
+
+ switch (ev->type) {
+ case PERF_EVENT_UPDATE__SCALE:
+ ev_scale = (struct event_update_event_scale *) ev->data;
+ ret += fprintf(fp, "... scale: %f\n", ev_scale->scale);
+ break;
+ case PERF_EVENT_UPDATE__UNIT:
+ ret += fprintf(fp, "... unit: %s\n", ev->data);
+ break;
+ case PERF_EVENT_UPDATE__NAME:
+ ret += fprintf(fp, "... name: %s\n", ev->data);
+ break;
+ case PERF_EVENT_UPDATE__CPUS:
+ ev_cpus = (struct event_update_event_cpus *) ev->data;
+ ret += fprintf(fp, "... ");
+
+ map = cpu_map__new_data(&ev_cpus->cpus);
+ if (map)
+ ret += cpu_map__fprintf(map, fp);
+ else
+ ret += fprintf(fp, "failed to get cpus\n");
+ break;
+ default:
+ ret += fprintf(fp, "... unknown type\n");
+ break;
+ }
+
+ return ret;
+}
+
int perf_event__synthesize_attrs(struct perf_tool *tool,
struct perf_session *session,
perf_event__handler_t process)
@@ -2745,6 +2905,51 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
return 0;
}
+int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_evlist **pevlist)
+{
+ struct event_update_event *ev = &event->event_update;
+ struct event_update_event_scale *ev_scale;
+ struct event_update_event_cpus *ev_cpus;
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel;
+ struct cpu_map *map;
+
+ if (!pevlist || *pevlist == NULL)
+ return -EINVAL;
+
+ evlist = *pevlist;
+
+ evsel = perf_evlist__id2evsel(evlist, ev->id);
+ if (evsel == NULL)
+ return -EINVAL;
+
+ switch (ev->type) {
+ case PERF_EVENT_UPDATE__UNIT:
+ evsel->unit = strdup(ev->data);
+ break;
+ case PERF_EVENT_UPDATE__NAME:
+ evsel->name = strdup(ev->data);
+ break;
+ case PERF_EVENT_UPDATE__SCALE:
+ ev_scale = (struct event_update_event_scale *) ev->data;
+ evsel->scale = ev_scale->scale;
+ case PERF_EVENT_UPDATE__CPUS:
+ ev_cpus = (struct event_update_event_cpus *) ev->data;
+
+ map = cpu_map__new_data(&ev_cpus->cpus);
+ if (map)
+ evsel->own_cpus = map;
+ else
+ pr_err("failed to get event_update cpus\n");
+ default:
+ break;
+ }
+
+ return 0;
+}
+
int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd,
struct perf_evlist *evlist,
perf_event__handler_t process)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 05f27cb6b7e3..cff9892452ee 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -31,6 +31,7 @@ enum {
HEADER_PMU_MAPPINGS,
HEADER_GROUP_DESC,
HEADER_AUXTRACE,
+ HEADER_STAT,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
@@ -105,8 +106,24 @@ int perf_event__synthesize_attr(struct perf_tool *tool,
int perf_event__synthesize_attrs(struct perf_tool *tool,
struct perf_session *session,
perf_event__handler_t process);
+int perf_event__synthesize_event_update_unit(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process);
+int perf_event__synthesize_event_update_scale(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process);
+int perf_event__synthesize_event_update_name(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process);
+int perf_event__synthesize_event_update_cpus(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process);
int perf_event__process_attr(struct perf_tool *tool, union perf_event *event,
struct perf_evlist **pevlist);
+int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_evlist **pevlist);
+size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
int perf_event__synthesize_tracing_data(struct perf_tool *tool,
int fd, struct perf_evlist *evlist,
diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c
new file mode 100644
index 000000000000..dc1e41c9b054
--- /dev/null
+++ b/tools/perf/util/help-unknown-cmd.c
@@ -0,0 +1,103 @@
+#include "cache.h"
+#include <subcmd/help.h>
+#include "../builtin.h"
+#include "levenshtein.h"
+
+static int autocorrect;
+static struct cmdnames aliases;
+
+static int perf_unknown_cmd_config(const char *var, const char *value, void *cb)
+{
+ if (!strcmp(var, "help.autocorrect"))
+ autocorrect = perf_config_int(var,value);
+ /* Also use aliases for command lookup */
+ if (!prefixcmp(var, "alias."))
+ add_cmdname(&aliases, var + 6, strlen(var + 6));
+
+ return perf_default_config(var, value, cb);
+}
+
+static int levenshtein_compare(const void *p1, const void *p2)
+{
+ const struct cmdname *const *c1 = p1, *const *c2 = p2;
+ const char *s1 = (*c1)->name, *s2 = (*c2)->name;
+ int l1 = (*c1)->len;
+ int l2 = (*c2)->len;
+ return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
+}
+
+static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
+{
+ unsigned int i;
+
+ ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc);
+
+ for (i = 0; i < old->cnt; i++)
+ cmds->names[cmds->cnt++] = old->names[i];
+ zfree(&old->names);
+ old->cnt = 0;
+}
+
+const char *help_unknown_cmd(const char *cmd)
+{
+ unsigned int i, n = 0, best_similarity = 0;
+ struct cmdnames main_cmds, other_cmds;
+
+ memset(&main_cmds, 0, sizeof(main_cmds));
+ memset(&other_cmds, 0, sizeof(main_cmds));
+ memset(&aliases, 0, sizeof(aliases));
+
+ perf_config(perf_unknown_cmd_config, NULL);
+
+ load_command_list("perf-", &main_cmds, &other_cmds);
+
+ add_cmd_list(&main_cmds, &aliases);
+ add_cmd_list(&main_cmds, &other_cmds);
+ qsort(main_cmds.names, main_cmds.cnt,
+ sizeof(main_cmds.names), cmdname_compare);
+ uniq(&main_cmds);
+
+ if (main_cmds.cnt) {
+ /* This reuses cmdname->len for similarity index */
+ for (i = 0; i < main_cmds.cnt; ++i)
+ main_cmds.names[i]->len =
+ levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
+
+ qsort(main_cmds.names, main_cmds.cnt,
+ sizeof(*main_cmds.names), levenshtein_compare);
+
+ best_similarity = main_cmds.names[0]->len;
+ n = 1;
+ while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
+ ++n;
+ }
+
+ if (autocorrect && n == 1) {
+ const char *assumed = main_cmds.names[0]->name;
+
+ main_cmds.names[0] = NULL;
+ clean_cmdnames(&main_cmds);
+ fprintf(stderr, "WARNING: You called a perf program named '%s', "
+ "which does not exist.\n"
+ "Continuing under the assumption that you meant '%s'\n",
+ cmd, assumed);
+ if (autocorrect > 0) {
+ fprintf(stderr, "in %0.1f seconds automatically...\n",
+ (float)autocorrect/10.0);
+ poll(NULL, 0, autocorrect * 100);
+ }
+ return assumed;
+ }
+
+ fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd);
+
+ if (main_cmds.cnt && best_similarity < 6) {
+ fprintf(stderr, "\nDid you mean %s?\n",
+ n < 2 ? "this": "one of these");
+
+ for (i = 0; i < n; i++)
+ fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
+ }
+
+ exit(1);
+}
diff --git a/tools/perf/util/help-unknown-cmd.h b/tools/perf/util/help-unknown-cmd.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/perf/util/help-unknown-cmd.h
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 4fd37d6708cb..c226303e3da0 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -254,6 +254,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
he_stat__decay(&he->stat);
if (symbol_conf.cumulate_callchain)
he_stat__decay(he->stat_acc);
+ decay_callchain(he->callchain);
diff = prev_period - he->stat.period;
@@ -270,6 +271,8 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
if (sort__need_collapse)
rb_erase(&he->rb_node_in, &hists->entries_collapsed);
+ else
+ rb_erase(&he->rb_node_in, hists->entries_in);
--hists->nr_entries;
if (!he->filtered)
@@ -367,6 +370,25 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
if (symbol_conf.use_callchain)
callchain_init(he->callchain);
+ if (he->raw_data) {
+ he->raw_data = memdup(he->raw_data, he->raw_size);
+
+ if (he->raw_data == NULL) {
+ map__put(he->ms.map);
+ if (he->branch_info) {
+ map__put(he->branch_info->from.map);
+ map__put(he->branch_info->to.map);
+ free(he->branch_info);
+ }
+ if (he->mem_info) {
+ map__put(he->mem_info->iaddr.map);
+ map__put(he->mem_info->daddr.map);
+ }
+ free(he->stat_acc);
+ free(he);
+ return NULL;
+ }
+ }
INIT_LIST_HEAD(&he->pairs.node);
thread__get(he->thread);
}
@@ -459,7 +481,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
struct symbol *sym_parent,
struct branch_info *bi,
struct mem_info *mi,
- u64 period, u64 weight, u64 transaction,
+ struct perf_sample *sample,
bool sample_self)
{
struct hist_entry entry = {
@@ -476,15 +498,17 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
.level = al->level,
.stat = {
.nr_events = 1,
- .period = period,
- .weight = weight,
+ .period = sample->period,
+ .weight = sample->weight,
},
.parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent) | al->filtered,
.hists = hists,
.branch_info = bi,
.mem_info = mi,
- .transaction = transaction,
+ .transaction = sample->transaction,
+ .raw_data = sample->raw_data,
+ .raw_size = sample->raw_size,
};
return hists__findnew_entry(hists, &entry, al, sample_self);
@@ -524,12 +548,13 @@ iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al
u64 cost;
struct mem_info *mi = iter->priv;
struct hists *hists = evsel__hists(iter->evsel);
+ struct perf_sample *sample = iter->sample;
struct hist_entry *he;
if (mi == NULL)
return -EINVAL;
- cost = iter->sample->weight;
+ cost = sample->weight;
if (!cost)
cost = 1;
@@ -540,8 +565,10 @@ iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al
* and this is indirectly achieved by passing period=weight here
* and the he_stat__add_period() function.
*/
+ sample->period = cost;
+
he = __hists__add_entry(hists, al, iter->parent, NULL, mi,
- cost, cost, 0, true);
+ sample, true);
if (!he)
return -ENOMEM;
@@ -628,6 +655,7 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a
struct branch_info *bi;
struct perf_evsel *evsel = iter->evsel;
struct hists *hists = evsel__hists(evsel);
+ struct perf_sample *sample = iter->sample;
struct hist_entry *he = NULL;
int i = iter->curr;
int err = 0;
@@ -641,9 +669,11 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a
* The report shows the percentage of total branches captured
* and not events sampled. Thus we use a pseudo period of 1.
*/
+ sample->period = 1;
+ sample->weight = bi->flags.cycles ? bi->flags.cycles : 1;
+
he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
- 1, bi->flags.cycles ? bi->flags.cycles : 1,
- 0, true);
+ sample, true);
if (he == NULL)
return -ENOMEM;
@@ -680,8 +710,7 @@ iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location
struct hist_entry *he;
he = __hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
- sample->period, sample->weight,
- sample->transaction, true);
+ sample, true);
if (he == NULL)
return -ENOMEM;
@@ -742,8 +771,7 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter,
int err = 0;
he = __hists__add_entry(hists, al, iter->parent, NULL, NULL,
- sample->period, sample->weight,
- sample->transaction, true);
+ sample, true);
if (he == NULL)
return -ENOMEM;
@@ -795,6 +823,8 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
.sym = al->sym,
},
.parent = iter->parent,
+ .raw_data = sample->raw_data,
+ .raw_size = sample->raw_size,
};
int i;
struct callchain_cursor cursor;
@@ -816,8 +846,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
}
he = __hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
- sample->period, sample->weight,
- sample->transaction, false);
+ sample, false);
if (he == NULL)
return -ENOMEM;
@@ -924,9 +953,6 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
int64_t cmp = 0;
perf_hpp__for_each_sort_list(fmt) {
- if (perf_hpp__should_skip(fmt))
- continue;
-
cmp = fmt->cmp(fmt, left, right);
if (cmp)
break;
@@ -942,9 +968,6 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
int64_t cmp = 0;
perf_hpp__for_each_sort_list(fmt) {
- if (perf_hpp__should_skip(fmt))
- continue;
-
cmp = fmt->collapse(fmt, left, right);
if (cmp)
break;
@@ -975,6 +998,8 @@ void hist_entry__delete(struct hist_entry *he)
if (he->srcfile && he->srcfile[0])
free(he->srcfile);
free_callchain(he->callchain);
+ free(he->trace_output);
+ free(he->raw_data);
free(he);
}
@@ -982,9 +1007,8 @@ void hist_entry__delete(struct hist_entry *he)
* collapse the histogram
*/
-static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
- struct rb_root *root,
- struct hist_entry *he)
+bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
+ struct rb_root *root, struct hist_entry *he)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
@@ -1024,7 +1048,7 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
return true;
}
-static struct rb_root *hists__get_rotate_entries_in(struct hists *hists)
+struct rb_root *hists__get_rotate_entries_in(struct hists *hists)
{
struct rb_root *root;
@@ -1088,7 +1112,7 @@ static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
int64_t cmp = 0;
perf_hpp__for_each_sort_list(fmt) {
- if (perf_hpp__should_skip(fmt))
+ if (perf_hpp__should_skip(fmt, a->hists))
continue;
cmp = fmt->sort(fmt, a, b);
@@ -1559,10 +1583,8 @@ int perf_hist_config(const char *var, const char *value)
return 0;
}
-static int hists_evsel__init(struct perf_evsel *evsel)
+int __hists__init(struct hists *hists)
{
- struct hists *hists = evsel__hists(evsel);
-
memset(hists, 0, sizeof(*hists));
hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
hists->entries_in = &hists->entries_in_array[0];
@@ -1573,6 +1595,43 @@ static int hists_evsel__init(struct perf_evsel *evsel)
return 0;
}
+static void hists__delete_remaining_entries(struct rb_root *root)
+{
+ struct rb_node *node;
+ struct hist_entry *he;
+
+ while (!RB_EMPTY_ROOT(root)) {
+ node = rb_first(root);
+ rb_erase(node, root);
+
+ he = rb_entry(node, struct hist_entry, rb_node_in);
+ hist_entry__delete(he);
+ }
+}
+
+static void hists__delete_all_entries(struct hists *hists)
+{
+ hists__delete_entries(hists);
+ hists__delete_remaining_entries(&hists->entries_in_array[0]);
+ hists__delete_remaining_entries(&hists->entries_in_array[1]);
+ hists__delete_remaining_entries(&hists->entries_collapsed);
+}
+
+static void hists_evsel__exit(struct perf_evsel *evsel)
+{
+ struct hists *hists = evsel__hists(evsel);
+
+ hists__delete_all_entries(hists);
+}
+
+static int hists_evsel__init(struct perf_evsel *evsel)
+{
+ struct hists *hists = evsel__hists(evsel);
+
+ __hists__init(hists);
+ return 0;
+}
+
/*
* XXX We probably need a hists_evsel__exit() to free the hist_entries
* stored in the rbtree...
@@ -1581,7 +1640,8 @@ static int hists_evsel__init(struct perf_evsel *evsel)
int hists__init(void)
{
int err = perf_evsel__object_config(sizeof(struct hists_evsel),
- hists_evsel__init, NULL);
+ hists_evsel__init,
+ hists_evsel__exit);
if (err)
fputs("FATAL ERROR: Couldn't setup hists class\n", stderr);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index a48a2078d288..d4ec4822a103 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -52,6 +52,7 @@ enum hist_column {
HISTC_MEM_IADDR_SYMBOL,
HISTC_TRANSACTION,
HISTC_CYCLES,
+ HISTC_TRACE,
HISTC_NR_COLS, /* Last entry */
};
@@ -114,8 +115,8 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
struct addr_location *al,
struct symbol *parent,
struct branch_info *bi,
- struct mem_info *mi, u64 period,
- u64 weight, u64 transaction,
+ struct mem_info *mi,
+ struct perf_sample *sample,
bool sample_self);
int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
int max_stack_depth, void *arg);
@@ -184,6 +185,11 @@ static inline struct hists *evsel__hists(struct perf_evsel *evsel)
}
int hists__init(void);
+int __hists__init(struct hists *hists);
+
+struct rb_root *hists__get_rotate_entries_in(struct hists *hists);
+bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
+ struct rb_root *root, struct hist_entry *he);
struct perf_hpp {
char *buf;
@@ -261,10 +267,20 @@ void perf_hpp__append_sort_keys(void);
bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format);
bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
+bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *format);
+bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists);
-static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format)
+static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format,
+ struct hists *hists)
{
- return format->elide;
+ if (format->elide)
+ return true;
+
+ if (perf_hpp__is_dynamic_entry(format) &&
+ !perf_hpp__defined_dynamic_entry(format, hists))
+ return true;
+
+ return false;
}
void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists);
diff --git a/tools/perf/util/include/linux/string.h b/tools/perf/util/include/linux/string.h
deleted file mode 100644
index 6f19c548ecc0..000000000000
--- a/tools/perf/util/include/linux/string.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#include <string.h>
-
-void *memdup(const void *src, size_t len);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 97f963a3dcb9..81a2eb77ba7f 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1744,7 +1744,7 @@ static void intel_pt_free(struct perf_session *session)
auxtrace_heap__free(&pt->heap);
intel_pt_free_events(session);
session->auxtrace = NULL;
- thread__delete(pt->unknown_thread);
+ thread__put(pt->unknown_thread);
free(pt);
}
@@ -2153,7 +2153,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
return 0;
err_delete_thread:
- thread__delete(pt->unknown_thread);
+ thread__zput(pt->unknown_thread);
err_free_queues:
intel_pt_log_disable();
auxtrace_queues__free(&pt->queues);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 5ef90be2a249..ad79297c76c8 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -25,6 +25,7 @@ static void dsos__init(struct dsos *dsos)
int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
{
+ memset(machine, 0, sizeof(*machine));
map_groups__init(&machine->kmaps, machine);
RB_CLEAR_NODE(&machine->rb_node);
dsos__init(&machine->dsos);
@@ -44,6 +45,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
machine->comm_exec = false;
machine->kernel_start = 0;
+ memset(machine->vmlinux_maps, 0, sizeof(machine->vmlinux_maps));
+
machine->root_dir = strdup(root_dir);
if (machine->root_dir == NULL)
return -ENOMEM;
@@ -91,6 +94,7 @@ static void dsos__purge(struct dsos *dsos)
list_for_each_entry_safe(pos, n, &dsos->head, node) {
RB_CLEAR_NODE(&pos->rb_node);
+ pos->root = NULL;
list_del_init(&pos->node);
dso__put(pos);
}
@@ -121,6 +125,7 @@ void machine__delete_threads(struct machine *machine)
void machine__exit(struct machine *machine)
{
+ machine__destroy_kernel_maps(machine);
map_groups__exit(&machine->kmaps);
dsos__exit(&machine->dsos);
machine__exit_vdso(machine);
@@ -347,13 +352,18 @@ static void machine__update_thread_pid(struct machine *machine,
}
th->mg = map_groups__get(leader->mg);
-
+out_put:
+ thread__put(leader);
return;
-
out_err:
pr_err("Failed to join map groups for %d:%d\n", th->pid_, th->tid);
+ goto out_put;
}
+/*
+ * Caller must eventually drop thread->refcnt returned with a successfull
+ * lookup/new thread inserted.
+ */
static struct thread *____machine__findnew_thread(struct machine *machine,
pid_t pid, pid_t tid,
bool create)
@@ -371,7 +381,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
if (th != NULL) {
if (th->tid == tid) {
machine__update_thread_pid(machine, th, pid);
- return th;
+ return thread__get(th);
}
machine->last_match = NULL;
@@ -384,7 +394,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
if (th->tid == tid) {
machine->last_match = th;
machine__update_thread_pid(machine, th, pid);
- return th;
+ return thread__get(th);
}
if (tid < th->tid)
@@ -412,7 +422,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
if (thread__init_map_groups(th, machine)) {
rb_erase_init(&th->rb_node, &machine->threads);
RB_CLEAR_NODE(&th->rb_node);
- thread__delete(th);
+ thread__put(th);
return NULL;
}
/*
@@ -436,7 +446,7 @@ struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
struct thread *th;
pthread_rwlock_wrlock(&machine->threads_lock);
- th = thread__get(__machine__findnew_thread(machine, pid, tid));
+ th = __machine__findnew_thread(machine, pid, tid);
pthread_rwlock_unlock(&machine->threads_lock);
return th;
}
@@ -446,7 +456,7 @@ struct thread *machine__find_thread(struct machine *machine, pid_t pid,
{
struct thread *th;
pthread_rwlock_rdlock(&machine->threads_lock);
- th = thread__get(____machine__findnew_thread(machine, pid, tid, false));
+ th = ____machine__findnew_thread(machine, pid, tid, false);
pthread_rwlock_unlock(&machine->threads_lock);
return th;
}
@@ -559,11 +569,29 @@ int machine__process_switch_event(struct machine *machine __maybe_unused,
return 0;
}
+static void dso__adjust_kmod_long_name(struct dso *dso, const char *filename)
+{
+ const char *dup_filename;
+
+ if (!filename || !dso || !dso->long_name)
+ return;
+ if (dso->long_name[0] != '[')
+ return;
+ if (!strchr(filename, '/'))
+ return;
+
+ dup_filename = strdup(filename);
+ if (!dup_filename)
+ return;
+
+ dso__set_long_name(dso, dup_filename, true);
+}
+
struct map *machine__findnew_module_map(struct machine *machine, u64 start,
const char *filename)
{
struct map *map = NULL;
- struct dso *dso;
+ struct dso *dso = NULL;
struct kmod_path m;
if (kmod_path__parse_name(&m, filename))
@@ -571,8 +599,15 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
map = map_groups__find_by_name(&machine->kmaps, MAP__FUNCTION,
m.name);
- if (map)
+ if (map) {
+ /*
+ * If the map's dso is an offline module, give dso__load()
+ * a chance to find the file path of that module by fixing
+ * long_name.
+ */
+ dso__adjust_kmod_long_name(map->dso, filename);
goto out;
+ }
dso = machine__findnew_module_dso(machine, &m, filename);
if (dso == NULL)
@@ -584,7 +619,11 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
map_groups__insert(&machine->kmaps, map);
+ /* Put the map here because map_groups__insert alread got it */
+ map__put(map);
out:
+ /* put the dso here, corresponding to machine__findnew_module_dso */
+ dso__put(dso);
free(m.name);
return map;
}
@@ -739,6 +778,9 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
enum map_type type;
u64 start = machine__get_running_kernel_start(machine, NULL);
+ /* In case of renewal the kernel map, destroy previous one */
+ machine__destroy_kernel_maps(machine);
+
for (type = 0; type < MAP__NR_TYPES; ++type) {
struct kmap *kmap;
struct map *map;
@@ -787,6 +829,7 @@ void machine__destroy_kernel_maps(struct machine *machine)
kmap->ref_reloc_sym = NULL;
}
+ map__put(machine->vmlinux_maps[type]);
machine->vmlinux_maps[type] = NULL;
}
}
@@ -1083,11 +1126,14 @@ int machine__create_kernel_maps(struct machine *machine)
struct dso *kernel = machine__get_kernel(machine);
const char *name;
u64 addr = machine__get_running_kernel_start(machine, &name);
- if (!addr)
+ int ret;
+
+ if (!addr || kernel == NULL)
return -1;
- if (kernel == NULL ||
- __machine__create_kernel_maps(machine, kernel) < 0)
+ ret = __machine__create_kernel_maps(machine, kernel);
+ dso__put(kernel);
+ if (ret < 0)
return -1;
if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
@@ -1608,6 +1654,8 @@ static int add_callchain_ip(struct thread *thread,
}
}
+ if (symbol_conf.hide_unresolved && al.sym == NULL)
+ return 0;
return callchain_cursor_append(&callchain_cursor, al.addr, al.map, al.sym);
}
@@ -1862,6 +1910,9 @@ check_calls:
static int unwind_entry(struct unwind_entry *entry, void *arg)
{
struct callchain_cursor *cursor = arg;
+
+ if (symbol_conf.hide_unresolved && entry->sym == NULL)
+ return 0;
return callchain_cursor_append(cursor, entry->ip,
entry->map, entry->sym);
}
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index afc6b56cf749..171b6d10a04b 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -26,8 +26,8 @@ const char *map_type__name[MAP__NR_TYPES] = {
static inline int is_anon_memory(const char *filename)
{
return !strcmp(filename, "//anon") ||
- !strcmp(filename, "/dev/zero (deleted)") ||
- !strcmp(filename, "/anon_hugepage (deleted)");
+ !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) ||
+ !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1);
}
static inline int is_no_dso_memory(const char *filename)
@@ -691,6 +691,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
__map_groups__insert(pos->groups, before);
if (verbose >= 2)
map__fprintf(before, fp);
+ map__put(before);
}
if (map->end < pos->end) {
@@ -705,6 +706,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
__map_groups__insert(pos->groups, after);
if (verbose >= 2)
map__fprintf(after, fp);
+ map__put(after);
}
put_map:
map__put(pos);
@@ -742,6 +744,7 @@ int map_groups__clone(struct map_groups *mg,
if (new == NULL)
goto out_unlock;
map_groups__insert(mg, new);
+ map__put(new);
}
err = 0;
diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
index 355eecf6bf59..afc088dd7d20 100644
--- a/tools/perf/util/parse-branch-options.c
+++ b/tools/perf/util/parse-branch-options.c
@@ -1,7 +1,7 @@
#include "perf.h"
#include "util/util.h"
#include "util/debug.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/parse-branch-options.h"
#define BRANCH_OPT(n, m) \
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index e48d9da75707..4f7b0efdde2f 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -4,9 +4,9 @@
#include "../perf.h"
#include "evlist.h"
#include "evsel.h"
-#include "parse-options.h"
+#include <subcmd/parse-options.h>
#include "parse-events.h"
-#include "exec_cmd.h"
+#include <subcmd/exec-cmd.h>
#include "string.h"
#include "symbol.h"
#include "cache.h"
@@ -124,6 +124,10 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
.symbol = "dummy",
.alias = "",
},
+ [PERF_COUNT_SW_BPF_OUTPUT] = {
+ .symbol = "bpf-output",
+ .alias = "",
+ },
};
#define __PERF_EVENT_FIELD(config, name) \
@@ -1879,7 +1883,7 @@ restart:
for (i = 0; i < max; i++, syms++) {
- if (event_glob != NULL &&
+ if (event_glob != NULL && syms->symbol != NULL &&
!(strglobmatch(syms->symbol, event_glob) ||
(syms->alias && strglobmatch(syms->alias, event_glob))))
continue;
diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
index 4f2c1c255d81..646ecf736aad 100644
--- a/tools/perf/util/parse-regs-options.c
+++ b/tools/perf/util/parse-regs-options.c
@@ -1,7 +1,7 @@
#include "perf.h"
#include "util/util.h"
#include "util/debug.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/parse-regs-options.h"
int
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
index 5d13cb45b317..3654d964e49d 100644
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c
@@ -22,24 +22,6 @@ static const char *get_perf_dir(void)
return ".";
}
-/*
- * If libc has strlcpy() then that version will override this
- * implementation:
- */
-size_t __weak strlcpy(char *dest, const char *src, size_t size)
-{
- size_t ret = strlen(src);
-
- if (size) {
- size_t len = (ret >= size) ? size - 1 : ret;
-
- memcpy(dest, src, len);
- dest[len] = '\0';
- }
-
- return ret;
-}
-
static char *get_pathname(void)
{
static char pathname_array[4][PATH_MAX];
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index e4b173dec4b9..b597bcc8fc78 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -220,6 +220,7 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
alias->scale = 1.0;
alias->unit[0] = '\0';
alias->per_pkg = false;
+ alias->snapshot = false;
ret = parse_events_terms(&alias->terms, val);
if (ret) {
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 03875f9154e7..93996ec4bbe3 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2326,8 +2326,11 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
goto out;
if (!allow_suffix) {
- pr_warning("Error: event \"%s\" already exists. "
- "(Use -f to force duplicates.)\n", buf);
+ pr_warning("Error: event \"%s\" already exists.\n"
+ " Hint: Remove existing event by 'perf probe -d'\n"
+ " or force duplicates by 'perf probe -f'\n"
+ " or set 'force=yes' in BPF source.\n",
+ buf);
ret = -EEXIST;
goto out;
}
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index bd8f03de5e40..2be10fb27172 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -654,6 +654,7 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod,
static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
{
Dwarf_Attribute fb_attr;
+ Dwarf_Frame *frame = NULL;
size_t nops;
int ret;
@@ -686,11 +687,11 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
#if _ELFUTILS_PREREQ(0, 142)
} else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
pf->cfi != NULL) {
- Dwarf_Frame *frame;
if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 ||
dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
pr_warning("Failed to get call frame on 0x%jx\n",
(uintmax_t)pf->addr);
+ free(frame);
return -ENOENT;
}
#endif
@@ -699,7 +700,8 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
/* Call finder's callback handler */
ret = pf->callback(sc_die, pf);
- /* *pf->fb_ops will be cached in libdw. Don't free it. */
+ /* Since *pf->fb_ops can be a part of frame. we should free it here. */
+ free(frame);
pf->fb_ops = NULL;
return ret;
@@ -1183,7 +1185,7 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
container_of(pf, struct trace_event_finder, pf);
struct perf_probe_point *pp = &pf->pev->point;
struct probe_trace_event *tev;
- struct perf_probe_arg *args;
+ struct perf_probe_arg *args = NULL;
int ret, i;
/* Check number of tevs */
@@ -1198,19 +1200,23 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
ret = convert_to_trace_point(&pf->sp_die, tf->mod, pf->addr,
pp->retprobe, pp->function, &tev->point);
if (ret < 0)
- return ret;
+ goto end;
tev->point.realname = strdup(dwarf_diename(sc_die));
- if (!tev->point.realname)
- return -ENOMEM;
+ if (!tev->point.realname) {
+ ret = -ENOMEM;
+ goto end;
+ }
pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
tev->point.offset);
/* Expand special probe argument if exist */
args = zalloc(sizeof(struct perf_probe_arg) * MAX_PROBE_ARGS);
- if (args == NULL)
- return -ENOMEM;
+ if (args == NULL) {
+ ret = -ENOMEM;
+ goto end;
+ }
ret = expand_probe_args(sc_die, pf, args);
if (ret < 0)
@@ -1234,6 +1240,10 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
}
end:
+ if (ret) {
+ clear_probe_trace_event(tev);
+ tf->ntevs--;
+ }
free(args);
return ret;
}
@@ -1246,7 +1256,7 @@ int debuginfo__find_trace_events(struct debuginfo *dbg,
struct trace_event_finder tf = {
.pf = {.pev = pev, .callback = add_probe_trace_event},
.max_tevs = probe_conf.max_probes, .mod = dbg->mod};
- int ret;
+ int ret, i;
/* Allocate result tevs array */
*tevs = zalloc(sizeof(struct probe_trace_event) * tf.max_tevs);
@@ -1258,6 +1268,8 @@ int debuginfo__find_trace_events(struct debuginfo *dbg,
ret = debuginfo__find_probes(dbg, &tf.pf);
if (ret < 0) {
+ for (i = 0; i < tf.ntevs; i++)
+ clear_probe_trace_event(&tf.tevs[i]);
zfree(tevs);
return ret;
}
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index 51be28b1bca2..8162ba0e2e57 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -10,6 +10,8 @@ util/ctype.c
util/evlist.c
util/evsel.c
util/cpumap.c
+../lib/bitmap.c
+../lib/find_bit.c
../lib/hweight.c
util/thread_map.c
util/util.c
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index a8e825fca42a..d72fafc1c800 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -41,6 +41,9 @@
#include "../thread-stack.h"
#include "../trace-event.h"
#include "../machine.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "stat.h"
PyMODINIT_FUNC initperf_trace_context(void);
@@ -859,6 +862,104 @@ static void python_process_event(union perf_event *event,
}
}
+static void get_handler_name(char *str, size_t size,
+ struct perf_evsel *evsel)
+{
+ char *p = str;
+
+ scnprintf(str, size, "stat__%s", perf_evsel__name(evsel));
+
+ while ((p = strchr(p, ':'))) {
+ *p = '_';
+ p++;
+ }
+}
+
+static void
+process_stat(struct perf_evsel *counter, int cpu, int thread, u64 tstamp,
+ struct perf_counts_values *count)
+{
+ PyObject *handler, *t;
+ static char handler_name[256];
+ int n = 0;
+
+ t = PyTuple_New(MAX_FIELDS);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+ get_handler_name(handler_name, sizeof(handler_name),
+ counter);
+
+ handler = get_handler(handler_name);
+ if (!handler) {
+ pr_debug("can't find python handler %s\n", handler_name);
+ return;
+ }
+
+ PyTuple_SetItem(t, n++, PyInt_FromLong(cpu));
+ PyTuple_SetItem(t, n++, PyInt_FromLong(thread));
+
+ tuple_set_u64(t, n++, tstamp);
+ tuple_set_u64(t, n++, count->val);
+ tuple_set_u64(t, n++, count->ena);
+ tuple_set_u64(t, n++, count->run);
+
+ if (_PyTuple_Resize(&t, n) == -1)
+ Py_FatalError("error resizing Python tuple");
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
+}
+
+static void python_process_stat(struct perf_stat_config *config,
+ struct perf_evsel *counter, u64 tstamp)
+{
+ struct thread_map *threads = counter->threads;
+ struct cpu_map *cpus = counter->cpus;
+ int cpu, thread;
+
+ if (config->aggr_mode == AGGR_GLOBAL) {
+ process_stat(counter, -1, -1, tstamp,
+ &counter->counts->aggr);
+ return;
+ }
+
+ for (thread = 0; thread < threads->nr; thread++) {
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ process_stat(counter, cpus->map[cpu],
+ thread_map__pid(threads, thread), tstamp,
+ perf_counts(counter->counts, cpu, thread));
+ }
+ }
+}
+
+static void python_process_stat_interval(u64 tstamp)
+{
+ PyObject *handler, *t;
+ static const char handler_name[] = "stat__interval";
+ int n = 0;
+
+ t = PyTuple_New(MAX_FIELDS);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+ handler = get_handler(handler_name);
+ if (!handler) {
+ pr_debug("can't find python handler %s\n", handler_name);
+ return;
+ }
+
+ tuple_set_u64(t, n++, tstamp);
+
+ if (_PyTuple_Resize(&t, n) == -1)
+ Py_FatalError("error resizing Python tuple");
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
+}
+
static int run_start_sub(void)
{
main_module = PyImport_AddModule("__main__");
@@ -1201,10 +1302,12 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
}
struct scripting_ops python_scripting_ops = {
- .name = "Python",
- .start_script = python_start_script,
- .flush_script = python_flush_script,
- .stop_script = python_stop_script,
- .process_event = python_process_event,
- .generate_script = python_generate_script,
+ .name = "Python",
+ .start_script = python_start_script,
+ .flush_script = python_flush_script,
+ .stop_script = python_stop_script,
+ .process_event = python_process_event,
+ .process_stat = python_process_stat,
+ .process_stat_interval = python_process_stat_interval,
+ .generate_script = python_generate_script,
};
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c35ffdd360fe..d5636ba94b20 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -17,6 +17,7 @@
#include "asm/bug.h"
#include "auxtrace.h"
#include "thread-stack.h"
+#include "stat.h"
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
@@ -36,6 +37,9 @@ static int perf_session__open(struct perf_session *session)
if (perf_data_file__is_pipe(file))
return 0;
+ if (perf_header__has_feat(&session->header, HEADER_STAT))
+ return 0;
+
if (!perf_evlist__valid_sample_type(session->evlist)) {
pr_err("non matching sample_type\n");
return -1;
@@ -205,6 +209,18 @@ static int process_event_synth_attr_stub(struct perf_tool *tool __maybe_unused,
return 0;
}
+static int process_event_synth_event_update_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_evlist **pevlist
+ __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_event_update(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
static int process_event_sample_stub(struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
@@ -296,6 +312,67 @@ int process_event_auxtrace_error_stub(struct perf_tool *tool __maybe_unused,
return 0;
}
+
+static
+int process_event_thread_map_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_thread_map(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static
+int process_event_cpu_map_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_cpu_map(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static
+int process_event_stat_config_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat_config(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_stat_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *perf_session
+ __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_stat_round_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *perf_session
+ __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat_round(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
void perf_tool__fill_defaults(struct perf_tool *tool)
{
if (tool->sample == NULL)
@@ -328,6 +405,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->unthrottle = process_event_stub;
if (tool->attr == NULL)
tool->attr = process_event_synth_attr_stub;
+ if (tool->event_update == NULL)
+ tool->event_update = process_event_synth_event_update_stub;
if (tool->tracing_data == NULL)
tool->tracing_data = process_event_synth_tracing_data_stub;
if (tool->build_id == NULL)
@@ -346,6 +425,16 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->auxtrace = process_event_auxtrace_stub;
if (tool->auxtrace_error == NULL)
tool->auxtrace_error = process_event_auxtrace_error_stub;
+ if (tool->thread_map == NULL)
+ tool->thread_map = process_event_thread_map_stub;
+ if (tool->cpu_map == NULL)
+ tool->cpu_map = process_event_cpu_map_stub;
+ if (tool->stat_config == NULL)
+ tool->stat_config = process_event_stat_config_stub;
+ if (tool->stat == NULL)
+ tool->stat = process_stat_stub;
+ if (tool->stat_round == NULL)
+ tool->stat_round = process_stat_round_stub;
}
static void swap_sample_id_all(union perf_event *event, void *data)
@@ -569,6 +658,13 @@ static void perf_event__hdr_attr_swap(union perf_event *event,
mem_bswap_64(event->attr.id, size);
}
+static void perf_event__event_update_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->event_update.type = bswap_64(event->event_update.type);
+ event->event_update.id = bswap_64(event->event_update.id);
+}
+
static void perf_event__event_type_swap(union perf_event *event,
bool sample_id_all __maybe_unused)
{
@@ -616,6 +712,81 @@ static void perf_event__auxtrace_error_swap(union perf_event *event,
event->auxtrace_error.ip = bswap_64(event->auxtrace_error.ip);
}
+static void perf_event__thread_map_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ unsigned i;
+
+ event->thread_map.nr = bswap_64(event->thread_map.nr);
+
+ for (i = 0; i < event->thread_map.nr; i++)
+ event->thread_map.entries[i].pid = bswap_64(event->thread_map.entries[i].pid);
+}
+
+static void perf_event__cpu_map_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ struct cpu_map_data *data = &event->cpu_map.data;
+ struct cpu_map_entries *cpus;
+ struct cpu_map_mask *mask;
+ unsigned i;
+
+ data->type = bswap_64(data->type);
+
+ switch (data->type) {
+ case PERF_CPU_MAP__CPUS:
+ cpus = (struct cpu_map_entries *)data->data;
+
+ cpus->nr = bswap_16(cpus->nr);
+
+ for (i = 0; i < cpus->nr; i++)
+ cpus->cpu[i] = bswap_16(cpus->cpu[i]);
+ break;
+ case PERF_CPU_MAP__MASK:
+ mask = (struct cpu_map_mask *) data->data;
+
+ mask->nr = bswap_16(mask->nr);
+ mask->long_size = bswap_16(mask->long_size);
+
+ switch (mask->long_size) {
+ case 4: mem_bswap_32(&mask->mask, mask->nr); break;
+ case 8: mem_bswap_64(&mask->mask, mask->nr); break;
+ default:
+ pr_err("cpu_map swap: unsupported long size\n");
+ }
+ default:
+ break;
+ }
+}
+
+static void perf_event__stat_config_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ u64 size;
+
+ size = event->stat_config.nr * sizeof(event->stat_config.data[0]);
+ size += 1; /* nr item itself */
+ mem_bswap_64(&event->stat_config.nr, size);
+}
+
+static void perf_event__stat_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->stat.id = bswap_64(event->stat.id);
+ event->stat.thread = bswap_32(event->stat.thread);
+ event->stat.cpu = bswap_32(event->stat.cpu);
+ event->stat.val = bswap_64(event->stat.val);
+ event->stat.ena = bswap_64(event->stat.ena);
+ event->stat.run = bswap_64(event->stat.run);
+}
+
+static void perf_event__stat_round_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->stat_round.type = bswap_64(event->stat_round.type);
+ event->stat_round.time = bswap_64(event->stat_round.time);
+}
+
typedef void (*perf_event__swap_op)(union perf_event *event,
bool sample_id_all);
@@ -643,6 +814,12 @@ static perf_event__swap_op perf_event__swap_ops[] = {
[PERF_RECORD_AUXTRACE_INFO] = perf_event__auxtrace_info_swap,
[PERF_RECORD_AUXTRACE] = perf_event__auxtrace_swap,
[PERF_RECORD_AUXTRACE_ERROR] = perf_event__auxtrace_error_swap,
+ [PERF_RECORD_THREAD_MAP] = perf_event__thread_map_swap,
+ [PERF_RECORD_CPU_MAP] = perf_event__cpu_map_swap,
+ [PERF_RECORD_STAT_CONFIG] = perf_event__stat_config_swap,
+ [PERF_RECORD_STAT] = perf_event__stat_swap,
+ [PERF_RECORD_STAT_ROUND] = perf_event__stat_round_swap,
+ [PERF_RECORD_EVENT_UPDATE] = perf_event__event_update_swap,
[PERF_RECORD_HEADER_MAX] = NULL,
};
@@ -1154,6 +1331,8 @@ static s64 perf_session__process_user_event(struct perf_session *session,
perf_session__set_comm_exec(session);
}
return err;
+ case PERF_RECORD_EVENT_UPDATE:
+ return tool->event_update(tool, event, &session->evlist);
case PERF_RECORD_HEADER_EVENT_TYPE:
/*
* Depreceated, but we need to handle it for sake
@@ -1179,6 +1358,16 @@ static s64 perf_session__process_user_event(struct perf_session *session,
case PERF_RECORD_AUXTRACE_ERROR:
perf_session__auxtrace_error_inc(session, event);
return tool->auxtrace_error(tool, event, session);
+ case PERF_RECORD_THREAD_MAP:
+ return tool->thread_map(tool, event, session);
+ case PERF_RECORD_CPU_MAP:
+ return tool->cpu_map(tool, event, session);
+ case PERF_RECORD_STAT_CONFIG:
+ return tool->stat_config(tool, event, session);
+ case PERF_RECORD_STAT:
+ return tool->stat(tool, event, session);
+ case PERF_RECORD_STAT_ROUND:
+ return tool->stat_round(tool, event, session);
default:
return -EINVAL;
}
@@ -1311,17 +1500,20 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
return machine__findnew_thread(&session->machines.host, -1, pid);
}
-struct thread *perf_session__register_idle_thread(struct perf_session *session)
+int perf_session__register_idle_thread(struct perf_session *session)
{
struct thread *thread;
+ int err = 0;
thread = machine__findnew_thread(&session->machines.host, 0, 0);
if (thread == NULL || thread__set_comm(thread, "swapper", 0)) {
pr_err("problem inserting idle task.\n");
- thread = NULL;
+ err = -1;
}
- return thread;
+ /* machine__findnew_thread() got the thread, so put it */
+ thread__put(thread);
+ return err;
}
static void perf_session__warn_about_errors(const struct perf_session *session)
@@ -1676,7 +1868,7 @@ int perf_session__process_events(struct perf_session *session)
u64 size = perf_data_file__size(session->file);
int err;
- if (perf_session__register_idle_thread(session) == NULL)
+ if (perf_session__register_idle_thread(session) < 0)
return -ENOMEM;
if (!perf_data_file__is_pipe(session->file))
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 3e900c0efc73..5f792e35d4c1 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -89,7 +89,7 @@ struct machine *perf_session__findnew_machine(struct perf_session *session, pid_
}
struct thread *perf_session__findnew(struct perf_session *session, pid_t pid);
-struct thread *perf_session__register_idle_thread(struct perf_session *session);
+int perf_session__register_idle_thread(struct perf_session *session);
size_t perf_session__fprintf(struct perf_session *session, FILE *fp);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 2d8ccd4d9e1b..ec722346e6ff 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -4,6 +4,8 @@
#include "comm.h"
#include "symbol.h"
#include "evsel.h"
+#include "evlist.h"
+#include <traceevent/event-parse.h>
regex_t parent_regex;
const char default_parent_pattern[] = "^sys_|^do_page_fault";
@@ -13,6 +15,7 @@ const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cy
const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
const char default_top_sort_order[] = "dso,symbol";
const char default_diff_sort_order[] = "dso,symbol";
+const char default_tracepoint_sort_order[] = "trace";
const char *sort_order;
const char *field_order;
regex_t ignore_callees_regex;
@@ -443,6 +446,70 @@ struct sort_entry sort_socket = {
.se_width_idx = HISTC_SOCKET,
};
+/* --sort trace */
+
+static char *get_trace_output(struct hist_entry *he)
+{
+ struct trace_seq seq;
+ struct perf_evsel *evsel;
+ struct pevent_record rec = {
+ .data = he->raw_data,
+ .size = he->raw_size,
+ };
+
+ evsel = hists_to_evsel(he->hists);
+
+ trace_seq_init(&seq);
+ if (symbol_conf.raw_trace) {
+ pevent_print_fields(&seq, he->raw_data, he->raw_size,
+ evsel->tp_format);
+ } else {
+ pevent_event_info(&seq, evsel->tp_format, &rec);
+ }
+ return seq.buffer;
+}
+
+static int64_t
+sort__trace_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct perf_evsel *evsel;
+
+ evsel = hists_to_evsel(left->hists);
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+ return 0;
+
+ if (left->trace_output == NULL)
+ left->trace_output = get_trace_output(left);
+ if (right->trace_output == NULL)
+ right->trace_output = get_trace_output(right);
+
+ hists__new_col_len(left->hists, HISTC_TRACE, strlen(left->trace_output));
+ hists__new_col_len(right->hists, HISTC_TRACE, strlen(right->trace_output));
+
+ return strcmp(right->trace_output, left->trace_output);
+}
+
+static int hist_entry__trace_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ struct perf_evsel *evsel;
+
+ evsel = hists_to_evsel(he->hists);
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+ return scnprintf(bf, size, "%-*.*s", width, width, "N/A");
+
+ if (he->trace_output == NULL)
+ he->trace_output = get_trace_output(he);
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, he->trace_output);
+}
+
+struct sort_entry sort_trace = {
+ .se_header = "Trace output",
+ .se_cmp = sort__trace_cmp,
+ .se_snprintf = hist_entry__trace_snprintf,
+ .se_width_idx = HISTC_TRACE,
+};
+
/* sort keys for branch stacks */
static int64_t
@@ -1312,6 +1379,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
DIM(SORT_TRANSACTION, "transaction", sort_transaction),
+ DIM(SORT_TRACE, "trace", sort_trace),
};
#undef DIM
@@ -1529,6 +1597,455 @@ static int __sort_dimension__add_hpp_output(struct sort_dimension *sd)
return 0;
}
+struct hpp_dynamic_entry {
+ struct perf_hpp_fmt hpp;
+ struct perf_evsel *evsel;
+ struct format_field *field;
+ unsigned dynamic_len;
+ bool raw_trace;
+};
+
+static int hde_width(struct hpp_dynamic_entry *hde)
+{
+ if (!hde->hpp.len) {
+ int len = hde->dynamic_len;
+ int namelen = strlen(hde->field->name);
+ int fieldlen = hde->field->size;
+
+ if (namelen > len)
+ len = namelen;
+
+ if (!(hde->field->flags & FIELD_IS_STRING)) {
+ /* length for print hex numbers */
+ fieldlen = hde->field->size * 2 + 2;
+ }
+ if (fieldlen > len)
+ len = fieldlen;
+
+ hde->hpp.len = len;
+ }
+ return hde->hpp.len;
+}
+
+static void update_dynamic_len(struct hpp_dynamic_entry *hde,
+ struct hist_entry *he)
+{
+ char *str, *pos;
+ struct format_field *field = hde->field;
+ size_t namelen;
+ bool last = false;
+
+ if (hde->raw_trace)
+ return;
+
+ /* parse pretty print result and update max length */
+ if (!he->trace_output)
+ he->trace_output = get_trace_output(he);
+
+ namelen = strlen(field->name);
+ str = he->trace_output;
+
+ while (str) {
+ pos = strchr(str, ' ');
+ if (pos == NULL) {
+ last = true;
+ pos = str + strlen(str);
+ }
+
+ if (!strncmp(str, field->name, namelen)) {
+ size_t len;
+
+ str += namelen + 1;
+ len = pos - str;
+
+ if (len > hde->dynamic_len)
+ hde->dynamic_len = len;
+ break;
+ }
+
+ if (last)
+ str = NULL;
+ else
+ str = pos + 1;
+ }
+}
+
+static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct perf_evsel *evsel __maybe_unused)
+{
+ struct hpp_dynamic_entry *hde;
+ size_t len = fmt->user_len;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ if (!len)
+ len = hde_width(hde);
+
+ return scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, hde->field->name);
+}
+
+static int __sort__hde_width(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp __maybe_unused,
+ struct perf_evsel *evsel __maybe_unused)
+{
+ struct hpp_dynamic_entry *hde;
+ size_t len = fmt->user_len;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ if (!len)
+ len = hde_width(hde);
+
+ return len;
+}
+
+bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists)
+{
+ struct hpp_dynamic_entry *hde;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ return hists_to_evsel(hists) == hde->evsel;
+}
+
+static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct hpp_dynamic_entry *hde;
+ size_t len = fmt->user_len;
+ char *str, *pos;
+ struct format_field *field;
+ size_t namelen;
+ bool last = false;
+ int ret;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ if (!len)
+ len = hde_width(hde);
+
+ if (hde->raw_trace)
+ goto raw_field;
+
+ field = hde->field;
+ namelen = strlen(field->name);
+ str = he->trace_output;
+
+ while (str) {
+ pos = strchr(str, ' ');
+ if (pos == NULL) {
+ last = true;
+ pos = str + strlen(str);
+ }
+
+ if (!strncmp(str, field->name, namelen)) {
+ str += namelen + 1;
+ str = strndup(str, pos - str);
+
+ if (str == NULL)
+ return scnprintf(hpp->buf, hpp->size,
+ "%*.*s", len, len, "ERROR");
+ break;
+ }
+
+ if (last)
+ str = NULL;
+ else
+ str = pos + 1;
+ }
+
+ if (str == NULL) {
+ struct trace_seq seq;
+raw_field:
+ trace_seq_init(&seq);
+ pevent_print_field(&seq, he->raw_data, hde->field);
+ str = seq.buffer;
+ }
+
+ ret = scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, str);
+ free(str);
+ return ret;
+}
+
+static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b)
+{
+ struct hpp_dynamic_entry *hde;
+ struct format_field *field;
+ unsigned offset, size;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ field = hde->field;
+ if (field->flags & FIELD_IS_DYNAMIC) {
+ unsigned long long dyn;
+
+ pevent_read_number_field(field, a->raw_data, &dyn);
+ offset = dyn & 0xffff;
+ size = (dyn >> 16) & 0xffff;
+
+ /* record max width for output */
+ if (size > hde->dynamic_len)
+ hde->dynamic_len = size;
+ } else {
+ offset = field->offset;
+ size = field->size;
+
+ update_dynamic_len(hde, a);
+ update_dynamic_len(hde, b);
+ }
+
+ return memcmp(a->raw_data + offset, b->raw_data + offset, size);
+}
+
+bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt)
+{
+ return fmt->cmp == __sort__hde_cmp;
+}
+
+static struct hpp_dynamic_entry *
+__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field)
+{
+ struct hpp_dynamic_entry *hde;
+
+ hde = malloc(sizeof(*hde));
+ if (hde == NULL) {
+ pr_debug("Memory allocation failed\n");
+ return NULL;
+ }
+
+ hde->evsel = evsel;
+ hde->field = field;
+ hde->dynamic_len = 0;
+
+ hde->hpp.name = field->name;
+ hde->hpp.header = __sort__hde_header;
+ hde->hpp.width = __sort__hde_width;
+ hde->hpp.entry = __sort__hde_entry;
+ hde->hpp.color = NULL;
+
+ hde->hpp.cmp = __sort__hde_cmp;
+ hde->hpp.collapse = __sort__hde_cmp;
+ hde->hpp.sort = __sort__hde_cmp;
+
+ INIT_LIST_HEAD(&hde->hpp.list);
+ INIT_LIST_HEAD(&hde->hpp.sort_list);
+ hde->hpp.elide = false;
+ hde->hpp.len = 0;
+ hde->hpp.user_len = 0;
+
+ return hde;
+}
+
+static int parse_field_name(char *str, char **event, char **field, char **opt)
+{
+ char *event_name, *field_name, *opt_name;
+
+ event_name = str;
+ field_name = strchr(str, '.');
+
+ if (field_name) {
+ *field_name++ = '\0';
+ } else {
+ event_name = NULL;
+ field_name = str;
+ }
+
+ opt_name = strchr(field_name, '/');
+ if (opt_name)
+ *opt_name++ = '\0';
+
+ *event = event_name;
+ *field = field_name;
+ *opt = opt_name;
+
+ return 0;
+}
+
+/* find match evsel using a given event name. The event name can be:
+ * 1. '%' + event index (e.g. '%1' for first event)
+ * 2. full event name (e.g. sched:sched_switch)
+ * 3. partial event name (should not contain ':')
+ */
+static struct perf_evsel *find_evsel(struct perf_evlist *evlist, char *event_name)
+{
+ struct perf_evsel *evsel = NULL;
+ struct perf_evsel *pos;
+ bool full_name;
+
+ /* case 1 */
+ if (event_name[0] == '%') {
+ int nr = strtol(event_name+1, NULL, 0);
+
+ if (nr > evlist->nr_entries)
+ return NULL;
+
+ evsel = perf_evlist__first(evlist);
+ while (--nr > 0)
+ evsel = perf_evsel__next(evsel);
+
+ return evsel;
+ }
+
+ full_name = !!strchr(event_name, ':');
+ evlist__for_each(evlist, pos) {
+ /* case 2 */
+ if (full_name && !strcmp(pos->name, event_name))
+ return pos;
+ /* case 3 */
+ if (!full_name && strstr(pos->name, event_name)) {
+ if (evsel) {
+ pr_debug("'%s' event is ambiguous: it can be %s or %s\n",
+ event_name, evsel->name, pos->name);
+ return NULL;
+ }
+ evsel = pos;
+ }
+ }
+
+ return evsel;
+}
+
+static int __dynamic_dimension__add(struct perf_evsel *evsel,
+ struct format_field *field,
+ bool raw_trace)
+{
+ struct hpp_dynamic_entry *hde;
+
+ hde = __alloc_dynamic_entry(evsel, field);
+ if (hde == NULL)
+ return -ENOMEM;
+
+ hde->raw_trace = raw_trace;
+
+ perf_hpp__register_sort_field(&hde->hpp);
+ return 0;
+}
+
+static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace)
+{
+ int ret;
+ struct format_field *field;
+
+ field = evsel->tp_format->format.fields;
+ while (field) {
+ ret = __dynamic_dimension__add(evsel, field, raw_trace);
+ if (ret < 0)
+ return ret;
+
+ field = field->next;
+ }
+ return 0;
+}
+
+static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace)
+{
+ int ret;
+ struct perf_evsel *evsel;
+
+ evlist__for_each(evlist, evsel) {
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+ continue;
+
+ ret = add_evsel_fields(evsel, raw_trace);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+static int add_all_matching_fields(struct perf_evlist *evlist,
+ char *field_name, bool raw_trace)
+{
+ int ret = -ESRCH;
+ struct perf_evsel *evsel;
+ struct format_field *field;
+
+ evlist__for_each(evlist, evsel) {
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+ continue;
+
+ field = pevent_find_any_field(evsel->tp_format, field_name);
+ if (field == NULL)
+ continue;
+
+ ret = __dynamic_dimension__add(evsel, field, raw_trace);
+ if (ret < 0)
+ break;
+ }
+ return ret;
+}
+
+static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok)
+{
+ char *str, *event_name, *field_name, *opt_name;
+ struct perf_evsel *evsel;
+ struct format_field *field;
+ bool raw_trace = symbol_conf.raw_trace;
+ int ret = 0;
+
+ if (evlist == NULL)
+ return -ENOENT;
+
+ str = strdup(tok);
+ if (str == NULL)
+ return -ENOMEM;
+
+ if (parse_field_name(str, &event_name, &field_name, &opt_name) < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (opt_name) {
+ if (strcmp(opt_name, "raw")) {
+ pr_debug("unsupported field option %s\n", opt_name);
+ ret = -EINVAL;
+ goto out;
+ }
+ raw_trace = true;
+ }
+
+ if (!strcmp(field_name, "trace_fields")) {
+ ret = add_all_dynamic_fields(evlist, raw_trace);
+ goto out;
+ }
+
+ if (event_name == NULL) {
+ ret = add_all_matching_fields(evlist, field_name, raw_trace);
+ goto out;
+ }
+
+ evsel = find_evsel(evlist, event_name);
+ if (evsel == NULL) {
+ pr_debug("Cannot find event: %s\n", event_name);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+ pr_debug("%s is not a tracepoint event\n", event_name);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (!strcmp(field_name, "*")) {
+ ret = add_evsel_fields(evsel, raw_trace);
+ } else {
+ field = pevent_find_any_field(evsel->tp_format, field_name);
+ if (field == NULL) {
+ pr_debug("Cannot find event field for %s.%s\n",
+ event_name, field_name);
+ return -ENOENT;
+ }
+
+ ret = __dynamic_dimension__add(evsel, field, raw_trace);
+ }
+
+out:
+ free(str);
+ return ret;
+}
+
static int __sort_dimension__add(struct sort_dimension *sd)
{
if (sd->taken)
@@ -1583,7 +2100,8 @@ int hpp_dimension__add_output(unsigned col)
return __hpp_dimension__add_output(&hpp_sort_dimensions[col]);
}
-int sort_dimension__add(const char *tok)
+static int sort_dimension__add(const char *tok,
+ struct perf_evlist *evlist __maybe_unused)
{
unsigned int i;
@@ -1664,10 +2182,13 @@ int sort_dimension__add(const char *tok)
return 0;
}
+ if (!add_dynamic_entry(evlist, tok))
+ return 0;
+
return -ESRCH;
}
-static const char *get_default_sort_order(void)
+static const char *get_default_sort_order(struct perf_evlist *evlist)
{
const char *default_sort_orders[] = {
default_sort_order,
@@ -1675,14 +2196,33 @@ static const char *get_default_sort_order(void)
default_mem_sort_order,
default_top_sort_order,
default_diff_sort_order,
+ default_tracepoint_sort_order,
};
+ bool use_trace = true;
+ struct perf_evsel *evsel;
BUG_ON(sort__mode >= ARRAY_SIZE(default_sort_orders));
+ if (evlist == NULL)
+ goto out_no_evlist;
+
+ evlist__for_each(evlist, evsel) {
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+ use_trace = false;
+ break;
+ }
+ }
+
+ if (use_trace) {
+ sort__mode = SORT_MODE__TRACEPOINT;
+ if (symbol_conf.raw_trace)
+ return "trace_fields";
+ }
+out_no_evlist:
return default_sort_orders[sort__mode];
}
-static int setup_sort_order(void)
+static int setup_sort_order(struct perf_evlist *evlist)
{
char *new_sort_order;
@@ -1703,7 +2243,7 @@ static int setup_sort_order(void)
* because it's checked over the rest of the code.
*/
if (asprintf(&new_sort_order, "%s,%s",
- get_default_sort_order(), sort_order + 1) < 0) {
+ get_default_sort_order(evlist), sort_order + 1) < 0) {
error("Not enough memory to set up --sort");
return -ENOMEM;
}
@@ -1712,13 +2252,41 @@ static int setup_sort_order(void)
return 0;
}
-static int __setup_sorting(void)
+/*
+ * Adds 'pre,' prefix into 'str' is 'pre' is
+ * not already part of 'str'.
+ */
+static char *prefix_if_not_in(const char *pre, char *str)
+{
+ char *n;
+
+ if (!str || strstr(str, pre))
+ return str;
+
+ if (asprintf(&n, "%s,%s", pre, str) < 0)
+ return NULL;
+
+ free(str);
+ return n;
+}
+
+static char *setup_overhead(char *keys)
+{
+ keys = prefix_if_not_in("overhead", keys);
+
+ if (symbol_conf.cumulate_callchain)
+ keys = prefix_if_not_in("overhead_children", keys);
+
+ return keys;
+}
+
+static int __setup_sorting(struct perf_evlist *evlist)
{
char *tmp, *tok, *str;
const char *sort_keys;
int ret = 0;
- ret = setup_sort_order();
+ ret = setup_sort_order(evlist);
if (ret)
return ret;
@@ -1732,7 +2300,7 @@ static int __setup_sorting(void)
return 0;
}
- sort_keys = get_default_sort_order();
+ sort_keys = get_default_sort_order(evlist);
}
str = strdup(sort_keys);
@@ -1741,9 +2309,20 @@ static int __setup_sorting(void)
return -ENOMEM;
}
+ /*
+ * Prepend overhead fields for backward compatibility.
+ */
+ if (!is_strict_order(field_order)) {
+ str = setup_overhead(str);
+ if (str == NULL) {
+ error("Not enough memory to setup overhead keys");
+ return -ENOMEM;
+ }
+ }
+
for (tok = strtok_r(str, ", ", &tmp);
tok; tok = strtok_r(NULL, ", ", &tmp)) {
- ret = sort_dimension__add(tok);
+ ret = sort_dimension__add(tok, evlist);
if (ret == -EINVAL) {
error("Invalid --sort key: `%s'", tok);
break;
@@ -1954,16 +2533,16 @@ out:
return ret;
}
-int setup_sorting(void)
+int setup_sorting(struct perf_evlist *evlist)
{
int err;
- err = __setup_sorting();
+ err = __setup_sorting(evlist);
if (err < 0)
return err;
if (parent_pattern != default_parent_pattern) {
- err = sort_dimension__add("parent");
+ err = sort_dimension__add("parent", evlist);
if (err < 0)
return err;
}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 31228851e397..687bbb124428 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -18,7 +18,7 @@
#include "debug.h"
#include "header.h"
-#include "parse-options.h"
+#include <subcmd/parse-options.h>
#include "parse-events.h"
#include "hist.h"
#include "thread.h"
@@ -122,6 +122,9 @@ struct hist_entry {
struct branch_info *branch_info;
struct hists *hists;
struct mem_info *mem_info;
+ void *raw_data;
+ u32 raw_size;
+ void *trace_output;
struct callchain_root callchain[0]; /* must be last member */
};
@@ -164,6 +167,7 @@ enum sort_mode {
SORT_MODE__MEMORY,
SORT_MODE__TOP,
SORT_MODE__DIFF,
+ SORT_MODE__TRACEPOINT,
};
enum sort_type {
@@ -180,6 +184,7 @@ enum sort_type {
SORT_LOCAL_WEIGHT,
SORT_GLOBAL_WEIGHT,
SORT_TRANSACTION,
+ SORT_TRACE,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
@@ -209,8 +214,6 @@ enum sort_type {
*/
struct sort_entry {
- struct list_head list;
-
const char *se_header;
int64_t (*se_cmp)(struct hist_entry *, struct hist_entry *);
@@ -224,10 +227,11 @@ struct sort_entry {
extern struct sort_entry sort_thread;
extern struct list_head hist_entry__sort_list;
-int setup_sorting(void);
+struct perf_evlist;
+struct pevent;
+int setup_sorting(struct perf_evlist *evlist);
int setup_output_field(void);
void reset_output_field(void);
-extern int sort_dimension__add(const char *);
void sort__setup_elide(FILE *fp);
void perf_hpp__set_elide(int idx, bool elide);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 2d9d8306dbd3..2f901d15e063 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -341,3 +341,65 @@ int perf_stat_process_counter(struct perf_stat_config *config,
return 0;
}
+
+int perf_event__process_stat_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ struct perf_counts_values count;
+ struct stat_event *st = &event->stat;
+ struct perf_evsel *counter;
+
+ count.val = st->val;
+ count.ena = st->ena;
+ count.run = st->run;
+
+ counter = perf_evlist__id2evsel(session->evlist, st->id);
+ if (!counter) {
+ pr_err("Failed to resolve counter for stat event.\n");
+ return -EINVAL;
+ }
+
+ *perf_counts(counter->counts, st->cpu, st->thread) = count;
+ counter->supported = true;
+ return 0;
+}
+
+size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp)
+{
+ struct stat_event *st = (struct stat_event *) event;
+ size_t ret;
+
+ ret = fprintf(fp, "\n... id %" PRIu64 ", cpu %d, thread %d\n",
+ st->id, st->cpu, st->thread);
+ ret += fprintf(fp, "... value %" PRIu64 ", enabled %" PRIu64 ", running %" PRIu64 "\n",
+ st->val, st->ena, st->run);
+
+ return ret;
+}
+
+size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp)
+{
+ struct stat_round_event *rd = (struct stat_round_event *)event;
+ size_t ret;
+
+ ret = fprintf(fp, "\n... time %" PRIu64 ", type %s\n", rd->time,
+ rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL");
+
+ return ret;
+}
+
+size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
+{
+ struct perf_stat_config sc;
+ size_t ret;
+
+ perf_event__read_stat_config(&sc, &event->stat_config);
+
+ ret = fprintf(fp, "\n");
+ ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode);
+ ret += fprintf(fp, "... scale %d\n", sc.scale);
+ ret += fprintf(fp, "... interval %u\n", sc.interval);
+
+ return ret;
+}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index da1d11c4f8c1..086f4e128d63 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -90,4 +90,14 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist);
int perf_stat_process_counter(struct perf_stat_config *config,
struct perf_evsel *counter);
+struct perf_tool;
+union perf_event;
+struct perf_session;
+int perf_event__process_stat_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session);
+
+size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
#endif
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index fc8781de62db..7f7e072be746 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -342,22 +342,6 @@ char *rtrim(char *s)
return s;
}
-/**
- * memdup - duplicate region of memory
- * @src: memory region to duplicate
- * @len: memory region length
- */
-void *memdup(const void *src, size_t len)
-{
- void *p;
-
- p = malloc(len);
- if (p)
- memcpy(p, src, len);
-
- return p;
-}
-
char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints)
{
/*
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 475d88d0a1c9..562b8ebeae5b 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1026,8 +1026,8 @@ int dso__load_sym(struct dso *dso, struct map *map,
curr_dso->long_name_len = dso->long_name_len;
curr_map = map__new2(start, curr_dso,
map->type);
+ dso__put(curr_dso);
if (curr_map == NULL) {
- dso__put(curr_dso);
goto out_elf_end;
}
if (adjust_kernel_syms) {
@@ -1042,7 +1042,14 @@ int dso__load_sym(struct dso *dso, struct map *map,
}
curr_dso->symtab_type = dso->symtab_type;
map_groups__insert(kmaps, curr_map);
+ /*
+ * Add it before we drop the referece to curr_map,
+ * i.e. while we still are sure to have a reference
+ * to this DSO via curr_map->dso.
+ */
dsos__add(&map->groups->machine->dsos, curr_dso);
+ /* kmaps already got it */
+ map__put(curr_map);
dso__set_loaded(curr_dso, map->type);
} else
curr_dso = curr_map->dso;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b4cc7662677e..3b2de6eb3376 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -39,6 +39,7 @@ struct symbol_conf symbol_conf = {
.cumulate_callchain = true,
.show_hist_headers = true,
.symfs = "",
+ .event_group = true,
};
static enum dso_binary_type binary_type_symtab[] = {
@@ -654,19 +655,24 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map,
struct map_groups *kmaps = map__kmaps(map);
struct map *curr_map;
struct symbol *pos;
- int count = 0, moved = 0;
+ int count = 0;
+ struct rb_root old_root = dso->symbols[map->type];
struct rb_root *root = &dso->symbols[map->type];
struct rb_node *next = rb_first(root);
if (!kmaps)
return -1;
+ *root = RB_ROOT;
+
while (next) {
char *module;
pos = rb_entry(next, struct symbol, rb_node);
next = rb_next(&pos->rb_node);
+ rb_erase_init(&pos->rb_node, &old_root);
+
module = strchr(pos->name, '\t');
if (module)
*module = '\0';
@@ -674,28 +680,21 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map,
curr_map = map_groups__find(kmaps, map->type, pos->start);
if (!curr_map || (filter && filter(curr_map, pos))) {
- rb_erase_init(&pos->rb_node, root);
symbol__delete(pos);
- } else {
- pos->start -= curr_map->start - curr_map->pgoff;
- if (pos->end)
- pos->end -= curr_map->start - curr_map->pgoff;
- if (curr_map->dso != map->dso) {
- rb_erase_init(&pos->rb_node, root);
- symbols__insert(
- &curr_map->dso->symbols[curr_map->type],
- pos);
- ++moved;
- } else {
- ++count;
- }
+ continue;
}
+
+ pos->start -= curr_map->start - curr_map->pgoff;
+ if (pos->end)
+ pos->end -= curr_map->start - curr_map->pgoff;
+ symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
+ ++count;
}
/* Symbols have been adjusted */
dso->adjust_symbols = 1;
- return count + moved;
+ return count;
}
/*
@@ -1438,9 +1437,9 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
if (lstat(dso->name, &st) < 0)
goto out;
- if (st.st_uid && (st.st_uid != geteuid())) {
+ if (!symbol_conf.force && st.st_uid && (st.st_uid != geteuid())) {
pr_warning("File %s not owned by current user or root, "
- "ignoring it.\n", dso->name);
+ "ignoring it (use -f to override).\n", dso->name);
goto out;
}
@@ -1862,24 +1861,44 @@ static void vmlinux_path__exit(void)
zfree(&vmlinux_path);
}
+static const char * const vmlinux_paths[] = {
+ "vmlinux",
+ "/boot/vmlinux"
+};
+
+static const char * const vmlinux_paths_upd[] = {
+ "/boot/vmlinux-%s",
+ "/usr/lib/debug/boot/vmlinux-%s",
+ "/lib/modules/%s/build/vmlinux",
+ "/usr/lib/debug/lib/modules/%s/vmlinux",
+ "/usr/lib/debug/boot/vmlinux-%s.debug"
+};
+
+static int vmlinux_path__add(const char *new_entry)
+{
+ vmlinux_path[vmlinux_path__nr_entries] = strdup(new_entry);
+ if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+ return -1;
+ ++vmlinux_path__nr_entries;
+
+ return 0;
+}
+
static int vmlinux_path__init(struct perf_env *env)
{
struct utsname uts;
char bf[PATH_MAX];
char *kernel_version;
+ unsigned int i;
- vmlinux_path = malloc(sizeof(char *) * 6);
+ vmlinux_path = malloc(sizeof(char *) * (ARRAY_SIZE(vmlinux_paths) +
+ ARRAY_SIZE(vmlinux_paths_upd)));
if (vmlinux_path == NULL)
return -1;
- vmlinux_path[vmlinux_path__nr_entries] = strdup("vmlinux");
- if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
- goto out_fail;
- ++vmlinux_path__nr_entries;
- vmlinux_path[vmlinux_path__nr_entries] = strdup("/boot/vmlinux");
- if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
- goto out_fail;
- ++vmlinux_path__nr_entries;
+ for (i = 0; i < ARRAY_SIZE(vmlinux_paths); i++)
+ if (vmlinux_path__add(vmlinux_paths[i]) < 0)
+ goto out_fail;
/* only try kernel version if no symfs was given */
if (symbol_conf.symfs[0] != 0)
@@ -1894,28 +1913,11 @@ static int vmlinux_path__init(struct perf_env *env)
kernel_version = uts.release;
}
- snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", kernel_version);
- vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
- if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
- goto out_fail;
- ++vmlinux_path__nr_entries;
- snprintf(bf, sizeof(bf), "/usr/lib/debug/boot/vmlinux-%s",
- kernel_version);
- vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
- if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
- goto out_fail;
- ++vmlinux_path__nr_entries;
- snprintf(bf, sizeof(bf), "/lib/modules/%s/build/vmlinux", kernel_version);
- vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
- if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
- goto out_fail;
- ++vmlinux_path__nr_entries;
- snprintf(bf, sizeof(bf), "/usr/lib/debug/lib/modules/%s/vmlinux",
- kernel_version);
- vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
- if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
- goto out_fail;
- ++vmlinux_path__nr_entries;
+ for (i = 0; i < ARRAY_SIZE(vmlinux_paths_upd); i++) {
+ snprintf(bf, sizeof(bf), vmlinux_paths_upd[i], kernel_version);
+ if (vmlinux_path__add(bf) < 0)
+ goto out_fail;
+ }
return 0;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 40073c60b83d..ccd1caa40e11 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -84,6 +84,7 @@ struct symbol_conf {
unsigned short priv_size;
unsigned short nr_events;
bool try_vmlinux_path,
+ force,
ignore_vmlinux,
ignore_vmlinux_buildid,
show_kernel_path,
@@ -107,7 +108,9 @@ struct symbol_conf {
show_hist_headers,
branch_callstack,
has_filter,
- show_ref_callgraph;
+ show_ref_callgraph,
+ hide_unresolved,
+ raw_trace;
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,
diff --git a/tools/perf/util/term.c b/tools/perf/util/term.c
new file mode 100644
index 000000000000..90b47d8aa19c
--- /dev/null
+++ b/tools/perf/util/term.c
@@ -0,0 +1,35 @@
+#include "util.h"
+
+void get_term_dimensions(struct winsize *ws)
+{
+ char *s = getenv("LINES");
+
+ if (s != NULL) {
+ ws->ws_row = atoi(s);
+ s = getenv("COLUMNS");
+ if (s != NULL) {
+ ws->ws_col = atoi(s);
+ if (ws->ws_row && ws->ws_col)
+ return;
+ }
+ }
+#ifdef TIOCGWINSZ
+ if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
+ ws->ws_row && ws->ws_col)
+ return;
+#endif
+ ws->ws_row = 25;
+ ws->ws_col = 80;
+}
+
+void set_term_quiet_input(struct termios *old)
+{
+ struct termios tc;
+
+ tcgetattr(0, old);
+ tc = *old;
+ tc.c_lflag &= ~(ICANON | ECHO);
+ tc.c_cc[VMIN] = 0;
+ tc.c_cc[VTIME] = 0;
+ tcsetattr(0, TCSANOW, &tc);
+}
diff --git a/tools/perf/util/term.h b/tools/perf/util/term.h
new file mode 100644
index 000000000000..2c06a61846a1
--- /dev/null
+++ b/tools/perf/util/term.h
@@ -0,0 +1,10 @@
+#ifndef __PERF_TERM_H
+#define __PERF_TERM_H
+
+struct termios;
+struct winsize;
+
+void get_term_dimensions(struct winsize *ws);
+void set_term_quiet_input(struct termios *old);
+
+#endif /* __PERF_TERM_H */
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 0a9ae8014729..dfd00c6dad6e 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -19,8 +19,10 @@ int thread__init_map_groups(struct thread *thread, struct machine *machine)
thread->mg = map_groups__new(machine);
} else {
leader = __machine__findnew_thread(machine, pid, pid);
- if (leader)
+ if (leader) {
thread->mg = map_groups__get(leader->mg);
+ thread__put(leader);
+ }
}
return thread->mg ? 0 : -1;
@@ -53,7 +55,7 @@ struct thread *thread__new(pid_t pid, pid_t tid)
goto err_thread;
list_add(&comm->list, &thread->comm_list);
- atomic_set(&thread->refcnt, 0);
+ atomic_set(&thread->refcnt, 1);
RB_CLEAR_NODE(&thread->rb_node);
}
@@ -95,6 +97,10 @@ struct thread *thread__get(struct thread *thread)
void thread__put(struct thread *thread)
{
if (thread && atomic_dec_and_test(&thread->refcnt)) {
+ /*
+ * Remove it from the dead_threads list, as last reference
+ * is gone.
+ */
list_del_init(&thread->node);
thread__delete(thread);
}
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 6ec3c5ca438f..08afc6909953 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -13,6 +13,7 @@
#include "thread_map.h"
#include "util.h"
#include "debug.h"
+#include "event.h"
/* Skip "." and ".." directories */
static int filter(const struct dirent *dir)
@@ -304,6 +305,7 @@ out:
out_free_threads:
zfree(&threads);
+ strlist__delete(slist);
goto out;
}
@@ -408,3 +410,29 @@ void thread_map__read_comms(struct thread_map *threads)
for (i = 0; i < threads->nr; ++i)
comm_init(threads, i);
}
+
+static void thread_map__copy_event(struct thread_map *threads,
+ struct thread_map_event *event)
+{
+ unsigned i;
+
+ threads->nr = (int) event->nr;
+
+ for (i = 0; i < event->nr; i++) {
+ thread_map__set_pid(threads, i, (pid_t) event->entries[i].pid);
+ threads->map[i].comm = strndup(event->entries[i].comm, 16);
+ }
+
+ atomic_set(&threads->refcnt, 1);
+}
+
+struct thread_map *thread_map__new_event(struct thread_map_event *event)
+{
+ struct thread_map *threads;
+
+ threads = thread_map__alloc(event->nr);
+ if (threads)
+ thread_map__copy_event(threads, event);
+
+ return threads;
+}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index af679d8a50f8..85e4c7c4fbde 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -16,11 +16,14 @@ struct thread_map {
struct thread_map_data map[];
};
+struct thread_map_event;
+
struct thread_map *thread_map__new_dummy(void);
struct thread_map *thread_map__new_by_pid(pid_t pid);
struct thread_map *thread_map__new_by_tid(pid_t tid);
struct thread_map *thread_map__new_by_uid(uid_t uid);
struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
+struct thread_map *thread_map__new_event(struct thread_map_event *event);
struct thread_map *thread_map__get(struct thread_map *map);
void thread_map__put(struct thread_map *map);
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index cab8cc24831b..55de4cffcd4e 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -50,12 +50,18 @@ struct perf_tool {
throttle,
unthrottle;
event_attr_op attr;
+ event_attr_op event_update;
event_op2 tracing_data;
event_oe finished_round;
event_op2 build_id,
id_index,
auxtrace_info,
- auxtrace_error;
+ auxtrace_error,
+ thread_map,
+ cpu_map,
+ stat_config,
+ stat,
+ stat_round;
event_op3 auxtrace;
bool ordered_events;
bool ordering_requires_timestamps;
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index b85ee55cca0c..bce5b1dac268 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -65,6 +65,7 @@ int tracing_data_put(struct tracing_data *tdata);
struct addr_location;
struct perf_session;
+struct perf_stat_config;
struct scripting_ops {
const char *name;
@@ -75,6 +76,9 @@ struct scripting_ops {
struct perf_sample *sample,
struct perf_evsel *evsel,
struct addr_location *al);
+ void (*process_stat)(struct perf_stat_config *config,
+ struct perf_evsel *evsel, u64 tstamp);
+ void (*process_stat_interval)(u64 tstamp);
int (*generate_script) (struct pevent *pevent, const char *outfile);
};
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 2dcfe9a7c8d0..cf5e250bc78e 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -11,6 +11,7 @@
#include <linux/types.h>
#include "event.h"
#include "perf_regs.h"
+#include "callchain.h"
static char *debuginfo_path;
@@ -52,25 +53,28 @@ static int report_module(u64 ip, struct unwind_info *ui)
return __report_module(&al, ip, ui);
}
+/*
+ * Store all entries within entries array,
+ * we will process it after we finish unwind.
+ */
static int entry(u64 ip, struct unwind_info *ui)
{
- struct unwind_entry e;
+ struct unwind_entry *e = &ui->entries[ui->idx++];
struct addr_location al;
if (__report_module(&al, ip, ui))
return -1;
- e.ip = ip;
- e.map = al.map;
- e.sym = al.sym;
+ e->ip = ip;
+ e->map = al.map;
+ e->sym = al.sym;
pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
al.sym ? al.sym->name : "''",
ip,
al.map ? al.map->map_ip(al.map, ip) : (u64) 0);
-
- return ui->cb(&e, ui->arg);
+ return 0;
}
static pid_t next_thread(Dwfl *dwfl, void *arg, void **thread_argp)
@@ -92,6 +96,16 @@ static int access_dso_mem(struct unwind_info *ui, Dwarf_Addr addr,
thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
MAP__FUNCTION, addr, &al);
if (!al.map) {
+ /*
+ * We've seen cases (softice) where DWARF unwinder went
+ * through non executable mmaps, which we need to lookup
+ * in MAP__VARIABLE tree.
+ */
+ thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
+ MAP__VARIABLE, addr, &al);
+ }
+
+ if (!al.map) {
pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
return -1;
}
@@ -168,7 +182,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct perf_sample *data,
int max_stack)
{
- struct unwind_info ui = {
+ struct unwind_info *ui, ui_buf = {
.sample = data,
.thread = thread,
.machine = thread->mg->machine,
@@ -177,35 +191,54 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
.max_stack = max_stack,
};
Dwarf_Word ip;
- int err = -EINVAL;
+ int err = -EINVAL, i;
if (!data->user_regs.regs)
return -EINVAL;
- ui.dwfl = dwfl_begin(&offline_callbacks);
- if (!ui.dwfl)
+ ui = zalloc(sizeof(ui_buf) + sizeof(ui_buf.entries[0]) * max_stack);
+ if (!ui)
+ return -ENOMEM;
+
+ *ui = ui_buf;
+
+ ui->dwfl = dwfl_begin(&offline_callbacks);
+ if (!ui->dwfl)
goto out;
err = perf_reg_value(&ip, &data->user_regs, PERF_REG_IP);
if (err)
goto out;
- err = report_module(ip, &ui);
+ err = report_module(ip, ui);
if (err)
goto out;
- if (!dwfl_attach_state(ui.dwfl, EM_NONE, thread->tid, &callbacks, &ui))
+ if (!dwfl_attach_state(ui->dwfl, EM_NONE, thread->tid, &callbacks, ui))
goto out;
- err = dwfl_getthread_frames(ui.dwfl, thread->tid, frame_callback, &ui);
+ err = dwfl_getthread_frames(ui->dwfl, thread->tid, frame_callback, ui);
- if (err && !ui.max_stack)
+ if (err && !ui->max_stack)
err = 0;
+ /*
+ * Display what we got based on the order setup.
+ */
+ for (i = 0; i < ui->idx && !err; i++) {
+ int j = i;
+
+ if (callchain_param.order == ORDER_CALLER)
+ j = ui->idx - i - 1;
+
+ err = ui->entries[j].ip ? ui->cb(&ui->entries[j], ui->arg) : 0;
+ }
+
out:
if (err)
pr_debug("unwind: failed with '%s'\n", dwfl_errmsg(-1));
- dwfl_end(ui.dwfl);
+ dwfl_end(ui->dwfl);
+ free(ui);
return 0;
}
diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h
index 417a1426f3ad..58328669ed16 100644
--- a/tools/perf/util/unwind-libdw.h
+++ b/tools/perf/util/unwind-libdw.h
@@ -16,6 +16,8 @@ struct unwind_info {
unwind_entry_cb_t cb;
void *arg;
int max_stack;
+ int idx;
+ struct unwind_entry entries[];
};
#endif /* __PERF_UNWIND_LIBDW_H */
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index c83832b555e5..ee7e372297e5 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -319,6 +319,15 @@ static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
MAP__FUNCTION, ip, &al);
+ if (!al.map) {
+ /*
+ * We've seen cases (softice) where DWARF unwinder went
+ * through non executable mmaps, which we need to lookup
+ * in MAP__VARIABLE tree.
+ */
+ thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
+ MAP__VARIABLE, ip, &al);
+ }
return al.map;
}
@@ -416,20 +425,19 @@ get_proc_name(unw_addr_space_t __maybe_unused as,
static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
unw_word_t *data)
{
- struct addr_location al;
+ struct map *map;
ssize_t size;
- thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
- MAP__FUNCTION, addr, &al);
- if (!al.map) {
+ map = find_map(addr, ui);
+ if (!map) {
pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
return -1;
}
- if (!al.map->dso)
+ if (!map->dso)
return -1;
- size = dso__data_read_addr(al.map->dso, al.map, ui->machine,
+ size = dso__data_read_addr(map->dso, map, ui->machine,
addr, (u8 *) data, sizeof(*data));
return !(size == sizeof(*data));
@@ -614,23 +622,48 @@ void unwind__finish_access(struct thread *thread)
static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
void *arg, int max_stack)
{
+ u64 val;
+ unw_word_t ips[max_stack];
unw_addr_space_t addr_space;
unw_cursor_t c;
- int ret;
-
- addr_space = thread__priv(ui->thread);
- if (addr_space == NULL)
- return -1;
+ int ret, i = 0;
- ret = unw_init_remote(&c, addr_space, ui);
+ ret = perf_reg_value(&val, &ui->sample->user_regs, PERF_REG_IP);
if (ret)
- display_error(ret);
+ return ret;
- while (!ret && (unw_step(&c) > 0) && max_stack--) {
- unw_word_t ip;
+ ips[i++] = (unw_word_t) val;
- unw_get_reg(&c, UNW_REG_IP, &ip);
- ret = ip ? entry(ip, ui->thread, cb, arg) : 0;
+ /*
+ * If we need more than one entry, do the DWARF
+ * unwind itself.
+ */
+ if (max_stack - 1 > 0) {
+ addr_space = thread__priv(ui->thread);
+ if (addr_space == NULL)
+ return -1;
+
+ ret = unw_init_remote(&c, addr_space, ui);
+ if (ret)
+ display_error(ret);
+
+ while (!ret && (unw_step(&c) > 0) && i < max_stack) {
+ unw_get_reg(&c, UNW_REG_IP, &ips[i]);
+ ++i;
+ }
+
+ max_stack = i;
+ }
+
+ /*
+ * Display what we got based on the order setup.
+ */
+ for (i = 0; i < max_stack && !ret; i++) {
+ int j = i;
+
+ if (callchain_param.order == ORDER_CALLER)
+ j = max_stack - i - 1;
+ ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0;
}
return ret;
@@ -640,24 +673,17 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct thread *thread,
struct perf_sample *data, int max_stack)
{
- u64 ip;
struct unwind_info ui = {
.sample = data,
.thread = thread,
.machine = thread->mg->machine,
};
- int ret;
if (!data->user_regs.regs)
return -EINVAL;
- ret = perf_reg_value(&ip, &data->user_regs, PERF_REG_IP);
- if (ret)
- return ret;
-
- ret = entry(ip, thread, cb, arg);
- if (ret)
- return -ENOMEM;
+ if (max_stack <= 0)
+ return -EINVAL;
- return --max_stack > 0 ? get_entries(&ui, cb, arg, max_stack) : 0;
+ return get_entries(&ui, cb, arg, max_stack);
}
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 47b1e36c7ea0..88b8f8d21f58 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -16,12 +16,15 @@
#include <linux/kernel.h>
#include <unistd.h>
#include "callchain.h"
+#include "strlist.h"
+#include <subcmd/exec-cmd.h>
struct callchain_param callchain_param = {
.mode = CHAIN_GRAPH_ABS,
.min_percent = 0.5,
.order = ORDER_CALLEE,
- .key = CCKEY_FUNCTION
+ .key = CCKEY_FUNCTION,
+ .value = CCVAL_PERCENT,
};
/*
@@ -351,41 +354,8 @@ void sighandler_dump_stack(int sig)
{
psignal(sig, "perf");
dump_stack();
- exit(sig);
-}
-
-void get_term_dimensions(struct winsize *ws)
-{
- char *s = getenv("LINES");
-
- if (s != NULL) {
- ws->ws_row = atoi(s);
- s = getenv("COLUMNS");
- if (s != NULL) {
- ws->ws_col = atoi(s);
- if (ws->ws_row && ws->ws_col)
- return;
- }
- }
-#ifdef TIOCGWINSZ
- if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
- ws->ws_row && ws->ws_col)
- return;
-#endif
- ws->ws_row = 25;
- ws->ws_col = 80;
-}
-
-void set_term_quiet_input(struct termios *old)
-{
- struct termios tc;
-
- tcgetattr(0, old);
- tc = *old;
- tc.c_lflag &= ~(ICANON | ECHO);
- tc.c_cc[VMIN] = 0;
- tc.c_cc[VTIME] = 0;
- tcsetattr(0, TCSANOW, &tc);
+ signal(sig, SIG_DFL);
+ raise(sig);
}
int parse_nsec_time(const char *str, u64 *ptime)
@@ -695,3 +665,28 @@ fetch_kernel_version(unsigned int *puint, char *str,
*puint = (version << 16) + (patchlevel << 8) + sublevel;
return 0;
}
+
+const char *perf_tip(const char *dirpath)
+{
+ struct strlist *tips;
+ struct str_node *node;
+ char *tip = NULL;
+ struct strlist_config conf = {
+ .dirname = system_path(dirpath) ,
+ };
+
+ tips = strlist__new("tips.txt", &conf);
+ if (tips == NULL || strlist__nr_entries(tips) == 1) {
+ tip = (char *)"Cannot find tips.txt file";
+ goto out;
+ }
+
+ node = strlist__entry(tips, random() % strlist__nr_entries(tips));
+ if (asprintf(&tip, "Tip: %s", node->s) < 0)
+ tip = (char *)"Tip: get more memory! ;-)";
+
+out:
+ strlist__delete(tips);
+
+ return tip;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index dcc659017976..fe915e616f9b 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -53,6 +53,7 @@
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
+#include <term.h>
#include <errno.h>
#include <limits.h>
#include <sys/param.h>
@@ -150,12 +151,6 @@ extern void set_warning_routine(void (*routine)(const char *err, va_list params)
extern int prefixcmp(const char *str, const char *prefix);
extern void set_buildid_dir(const char *dir);
-static inline const char *skip_prefix(const char *str, const char *prefix)
-{
- size_t len = strlen(prefix);
- return strncmp(str, prefix, len) ? NULL : str + len;
-}
-
#ifdef __GLIBC_PREREQ
#if __GLIBC_PREREQ(2, 1)
#define HAVE_STRCHRNUL
@@ -186,14 +181,6 @@ static inline void *zalloc(size_t size)
#define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
-static inline int has_extension(const char *filename, const char *ext)
-{
- size_t len = strlen(filename);
- size_t extlen = strlen(ext);
-
- return len > extlen && !memcmp(filename + len - extlen, ext, extlen);
-}
-
/* Sane ctype - no locale, and works with signed chars */
#undef isascii
#undef isspace
@@ -282,9 +269,6 @@ void sighandler_dump_stack(int sig);
extern unsigned int page_size;
extern int cacheline_size;
-void get_term_dimensions(struct winsize *ws);
-void set_term_quiet_input(struct termios *old);
-
struct parse_tag {
char tag;
int mult;
@@ -358,4 +342,6 @@ int fetch_kernel_version(unsigned int *puint,
#define KVER_FMT "%d.%d.%d"
#define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x)
+const char *perf_tip(const char *dirpath);
+
#endif /* GIT_COMPAT_UTIL_H */
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index d8e4b20b6d54..0dac7e05a6ac 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1173,9 +1173,9 @@ dump_nhm_platform_info(void)
unsigned long long msr;
unsigned int ratio;
- get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr);
+ get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
- fprintf(stderr, "cpu%d: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
+ fprintf(stderr, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
ratio = (msr >> 40) & 0xFF;
fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n",
@@ -1807,7 +1807,7 @@ void check_permissions()
*
* MSR_SMI_COUNT 0x00000034
*
- * MSR_NHM_PLATFORM_INFO 0x000000ce
+ * MSR_PLATFORM_INFO 0x000000ce
* MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2
*
* MSR_PKG_C3_RESIDENCY 0x000003f8
@@ -1876,7 +1876,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
- get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr);
+ get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
base_ratio = (msr >> 8) & 0xFF;
base_hz = base_ratio * bclk * 1000000;
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 40ab4476c80a..51cf8256c6cd 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -420,8 +420,7 @@ static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
static int nfit_test0_alloc(struct nfit_test *t)
{
- size_t nfit_size = sizeof(struct acpi_table_nfit)
- + sizeof(struct acpi_nfit_system_address) * NUM_SPA
+ size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
+ sizeof(struct acpi_nfit_memory_map) * NUM_MEM
+ sizeof(struct acpi_nfit_control_region) * NUM_DCR
+ sizeof(struct acpi_nfit_data_region) * NUM_BDW
@@ -471,8 +470,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
static int nfit_test1_alloc(struct nfit_test *t)
{
- size_t nfit_size = sizeof(struct acpi_table_nfit)
- + sizeof(struct acpi_nfit_system_address)
+ size_t nfit_size = sizeof(struct acpi_nfit_system_address)
+ sizeof(struct acpi_nfit_memory_map)
+ sizeof(struct acpi_nfit_control_region);
@@ -488,39 +486,24 @@ static int nfit_test1_alloc(struct nfit_test *t)
return 0;
}
-static void nfit_test_init_header(struct acpi_table_nfit *nfit, size_t size)
-{
- memcpy(nfit->header.signature, ACPI_SIG_NFIT, 4);
- nfit->header.length = size;
- nfit->header.revision = 1;
- memcpy(nfit->header.oem_id, "LIBND", 6);
- memcpy(nfit->header.oem_table_id, "TEST", 5);
- nfit->header.oem_revision = 1;
- memcpy(nfit->header.asl_compiler_id, "TST", 4);
- nfit->header.asl_compiler_revision = 1;
-}
-
static void nfit_test0_setup(struct nfit_test *t)
{
struct nvdimm_bus_descriptor *nd_desc;
struct acpi_nfit_desc *acpi_desc;
struct acpi_nfit_memory_map *memdev;
void *nfit_buf = t->nfit_buf;
- size_t size = t->nfit_size;
struct acpi_nfit_system_address *spa;
struct acpi_nfit_control_region *dcr;
struct acpi_nfit_data_region *bdw;
struct acpi_nfit_flush_address *flush;
unsigned int offset;
- nfit_test_init_header(nfit_buf, size);
-
/*
* spa0 (interleave first half of dimm0 and dimm1, note storage
* does not actually alias the related block-data-window
* regions)
*/
- spa = nfit_buf + sizeof(struct acpi_table_nfit);
+ spa = nfit_buf;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
@@ -533,7 +516,7 @@ static void nfit_test0_setup(struct nfit_test *t)
* does not actually alias the related block-data-window
* regions)
*/
- spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa);
+ spa = nfit_buf + sizeof(*spa);
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
@@ -542,7 +525,7 @@ static void nfit_test0_setup(struct nfit_test *t)
spa->length = SPA1_SIZE;
/* spa2 (dcr0) dimm0 */
- spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 2;
+ spa = nfit_buf + sizeof(*spa) * 2;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
@@ -551,7 +534,7 @@ static void nfit_test0_setup(struct nfit_test *t)
spa->length = DCR_SIZE;
/* spa3 (dcr1) dimm1 */
- spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 3;
+ spa = nfit_buf + sizeof(*spa) * 3;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
@@ -560,7 +543,7 @@ static void nfit_test0_setup(struct nfit_test *t)
spa->length = DCR_SIZE;
/* spa4 (dcr2) dimm2 */
- spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 4;
+ spa = nfit_buf + sizeof(*spa) * 4;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
@@ -569,7 +552,7 @@ static void nfit_test0_setup(struct nfit_test *t)
spa->length = DCR_SIZE;
/* spa5 (dcr3) dimm3 */
- spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 5;
+ spa = nfit_buf + sizeof(*spa) * 5;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
@@ -578,7 +561,7 @@ static void nfit_test0_setup(struct nfit_test *t)
spa->length = DCR_SIZE;
/* spa6 (bdw for dcr0) dimm0 */
- spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 6;
+ spa = nfit_buf + sizeof(*spa) * 6;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
@@ -587,7 +570,7 @@ static void nfit_test0_setup(struct nfit_test *t)
spa->length = DIMM_SIZE;
/* spa7 (bdw for dcr1) dimm1 */
- spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 7;
+ spa = nfit_buf + sizeof(*spa) * 7;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
@@ -596,7 +579,7 @@ static void nfit_test0_setup(struct nfit_test *t)
spa->length = DIMM_SIZE;
/* spa8 (bdw for dcr2) dimm2 */
- spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 8;
+ spa = nfit_buf + sizeof(*spa) * 8;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
@@ -605,7 +588,7 @@ static void nfit_test0_setup(struct nfit_test *t)
spa->length = DIMM_SIZE;
/* spa9 (bdw for dcr3) dimm3 */
- spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 9;
+ spa = nfit_buf + sizeof(*spa) * 9;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
@@ -613,7 +596,7 @@ static void nfit_test0_setup(struct nfit_test *t)
spa->address = t->dimm_dma[3];
spa->length = DIMM_SIZE;
- offset = sizeof(struct acpi_table_nfit) + sizeof(*spa) * 10;
+ offset = sizeof(*spa) * 10;
/* mem-region0 (spa0, dimm0) */
memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1100,15 +1083,13 @@ static void nfit_test0_setup(struct nfit_test *t)
static void nfit_test1_setup(struct nfit_test *t)
{
- size_t size = t->nfit_size, offset;
+ size_t offset;
void *nfit_buf = t->nfit_buf;
struct acpi_nfit_memory_map *memdev;
struct acpi_nfit_control_region *dcr;
struct acpi_nfit_system_address *spa;
- nfit_test_init_header(nfit_buf, size);
-
- offset = sizeof(struct acpi_table_nfit);
+ offset = 0;
/* spa0 (flat range with no bdw aliasing) */
spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
diff --git a/tools/testing/selftests/futex/README b/tools/testing/selftests/futex/README
index 3224a049b196..0558bb9ce0a6 100644
--- a/tools/testing/selftests/futex/README
+++ b/tools/testing/selftests/futex/README
@@ -27,7 +27,7 @@ o The build system shall remain as simple as possible, avoiding any archive or
o Where possible, any helper functions or other package-wide code shall be
implemented in header files, avoiding the need to compile intermediate object
files.
-o External dependendencies shall remain as minimal as possible. Currently gcc
+o External dependencies shall remain as minimal as possible. Currently gcc
and glibc are the only dependencies.
o Tests return 0 for success and < 0 for failure.
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 5236e073919d..0f80eefb0bfd 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -38,8 +38,6 @@
#
# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-grace=120
-
T=/tmp/kvm-test-1-run.sh.$$
trap 'rm -rf $T' 0
touch $T
@@ -152,7 +150,7 @@ fi
qemu_args="`specify_qemu_cpus "$QEMU" "$qemu_args" "$cpu_count"`"
# Generate architecture-specific and interaction-specific qemu arguments
-qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$builddir/console.log"`"
+qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$resdir/console.log"`"
# Generate qemu -append arguments
qemu_append="`identify_qemu_append "$QEMU"`"
@@ -168,7 +166,7 @@ then
touch $resdir/buildonly
exit 0
fi
-echo "NOTE: $QEMU either did not run or was interactive" > $builddir/console.log
+echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
echo $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
( $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append "$qemu_append $boot_args"; echo $? > $resdir/qemu-retval ) &
qemu_pid=$!
@@ -214,7 +212,7 @@ then
else
break
fi
- if test $kruntime -ge $((seconds + grace))
+ if test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
then
echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1
kill -KILL $qemu_pid
@@ -224,6 +222,5 @@ then
done
fi
-cp $builddir/console.log $resdir
parse-torture.sh $resdir/console.log $title
parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index f6483609ebc2..4a431767f77a 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -42,6 +42,7 @@ TORTURE_DEFCONFIG=defconfig
TORTURE_BOOT_IMAGE=""
TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
TORTURE_KMAKE_ARG=""
+TORTURE_SHUTDOWN_GRACE=180
TORTURE_SUITE=rcu
resdir=""
configs=""
@@ -149,6 +150,11 @@ do
resdir=$2
shift
;;
+ --shutdown-grace)
+ checkarg --shutdown-grace "(seconds)" "$#" "$2" '^[0-9]*$' '^error'
+ TORTURE_SHUTDOWN_GRACE=$2
+ shift
+ ;;
--torture)
checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\)$' '^--'
TORTURE_SUITE=$2
@@ -266,6 +272,7 @@ TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
+TORTURE_SHUTDOWN_GRACE="$TORTURE_SHUTDOWN_GRACE"; export TORTURE_SHUTDOWN_GRACE
TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE
if ! test -e $resdir
then
@@ -307,10 +314,10 @@ awk < $T/cfgcpu.pack \
}
# Dump out the scripting required to run one test batch.
-function dump(first, pastlast)
+function dump(first, pastlast, batchnum)
{
- print "echo ----Start batch: `date`";
- print "echo ----Start batch: `date` >> " rd "/log";
+ print "echo ----Start batch " batchnum ": `date`";
+ print "echo ----Start batch " batchnum ": `date` >> " rd "/log";
jn=1
for (j = first; j < pastlast; j++) {
builddir=KVM "/b" jn
@@ -371,25 +378,28 @@ END {
njobs = i;
nc = ncpus;
first = 0;
+ batchnum = 1;
# Each pass through the following loop considers one test.
for (i = 0; i < njobs; i++) {
if (ncpus == 0) {
# Sequential test specified, each test its own batch.
- dump(i, i + 1);
+ dump(i, i + 1, batchnum);
first = i;
+ batchnum++;
} else if (nc < cpus[i] && i != 0) {
# Out of CPUs, dump out a batch.
- dump(first, i);
+ dump(first, i, batchnum);
first = i;
nc = ncpus;
+ batchnum++;
}
# Account for the CPUs needed by the current test.
nc -= cpus[i];
}
# Dump the last batch.
if (ncpus != 0)
- dump(first, i);
+ dump(first, i, batchnum);
}' >> $T/script
cat << ___EOF___ >> $T/script
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index d8f35cf116be..844787a0d7be 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -24,9 +24,6 @@
#
# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-T=/tmp/abat-chk-badness.sh.$$
-trap 'rm -f $T' 0
-
file="$1"
title="$2"
@@ -36,9 +33,41 @@ if grep -Pq '\x00' < $file
then
print_warning Console output contains nul bytes, old qemu still running?
fi
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|Stall ended before state dump start' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $T
-if test -s $T
+egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|Stall ended before state dump start' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags
+if test -s $1.diags
then
print_warning Assertion failure in $file $title
- cat $T
+ # cat $1.diags
+ summary=""
+ n_badness=`grep -c Badness $1`
+ if test "$n_badness" -ne 0
+ then
+ summary="$summary Badness: $n_badness"
+ fi
+ n_warn=`grep -v 'Warning: unable to open an initial console' $1 | egrep -c 'WARNING:|Warn'`
+ if test "$n_warn" -ne 0
+ then
+ summary="$summary Warnings: $n_warn"
+ fi
+ n_bugs=`egrep -c 'BUG|Oops:' $1`
+ if test "$n_bugs" -ne 0
+ then
+ summary="$summary Bugs: $n_bugs"
+ fi
+ n_calltrace=`grep -c 'Call Trace:' $1`
+ if test "$n_calltrace" -ne 0
+ then
+ summary="$summary Call Traces: $n_calltrace"
+ fi
+ n_lockdep=`grep -c =========== $1`
+ if test "$n_badness" -ne 0
+ then
+ summary="$summary lockdep: $n_badness"
+ fi
+ n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|Stall ended before state dump start' $1`
+ if test "$n_stalls" -ne 0
+ then
+ summary="$summary Stalls: $n_stalls"
+ fi
+ print_warning Summary: $summary
fi
diff --git a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
index 9ef33a743b73..24396ae8355b 100644
--- a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
+++ b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
@@ -20,7 +20,6 @@ CONFIG_PROVE_RCU
CONFIG_NO_HZ_FULL_SYSIDLE
CONFIG_RCU_NOCB_CPU
-CONFIG_RCU_USER_QS
Meaningless for TINY_RCU.
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
index 657f3a035488..4e2b1893d40d 100644
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -72,10 +72,6 @@ CONFIG_RCU_TORTURE_TEST_RUNNABLE
Always used in KVM testing.
-CONFIG_RCU_USER_QS
-
- Redundant with CONFIG_NO_HZ_FULL.
-
CONFIG_PREEMPT_RCU
CONFIG_TREE_RCU
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index e38cc54942db..882fe83a3554 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -492,6 +492,9 @@ TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
pid_t parent = getppid();
int fd;
void *map1, *map2;
+ int page_size = sysconf(_SC_PAGESIZE);
+
+ ASSERT_LT(0, page_size);
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret);
@@ -504,16 +507,16 @@ TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
EXPECT_EQ(parent, syscall(__NR_getppid));
map1 = (void *)syscall(sysno,
- NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, fd, PAGE_SIZE);
+ NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
EXPECT_NE(MAP_FAILED, map1);
/* mmap2() should never return. */
map2 = (void *)syscall(sysno,
- NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
+ NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
EXPECT_EQ(MAP_FAILED, map2);
/* The test failed, so clean up the resources. */
- munmap(map1, PAGE_SIZE);
- munmap(map2, PAGE_SIZE);
+ munmap(map1, page_size);
+ munmap(map2, page_size);
close(fd);
}
diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c
index 627ec7425f78..fd88e3025bed 100644
--- a/tools/testing/selftests/timers/clocksource-switch.c
+++ b/tools/testing/selftests/timers/clocksource-switch.c
@@ -97,7 +97,7 @@ int get_cur_clocksource(char *buf, size_t size)
int change_clocksource(char *clocksource)
{
int fd;
- size_t size;
+ ssize_t size;
fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_WRONLY);
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 0a3da64638ce..4db7d5691ba7 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -110,4 +110,10 @@ static inline void free_page(unsigned long addr)
(void) (&_min1 == &_min2); \
_min1 < _min2 ? _min1 : _min2; })
+/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
+#define list_add_tail(a, b) do {} while (0)
+#define list_del(a) do {} while (0)
+#define list_for_each_entry(a, b, c) while (0)
+/* end of stubs */
+
#endif /* KERNEL_H */
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index a3e07016a440..ee125e714053 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -3,12 +3,6 @@
#include <linux/scatterlist.h>
#include <linux/kernel.h>
-/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
-#define list_add_tail(a, b) do {} while (0)
-#define list_del(a) do {} while (0)
-#define list_for_each_entry(a, b, c) while (0)
-/* end of stubs */
-
struct virtio_device {
void *dev;
u64 features;
diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h
index 806d683ab107..57a6964a1e35 100644
--- a/tools/virtio/linux/virtio_config.h
+++ b/tools/virtio/linux/virtio_config.h
@@ -40,33 +40,39 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev,
#define virtio_has_feature(dev, feature) \
(__virtio_test_bit((dev), feature))
+static inline bool virtio_is_little_endian(struct virtio_device *vdev)
+{
+ return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) ||
+ virtio_legacy_is_little_endian();
+}
+
+/* Memory accessors */
static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val)
{
- return __virtio16_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+ return __virtio16_to_cpu(virtio_is_little_endian(vdev), val);
}
static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val)
{
- return __cpu_to_virtio16(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+ return __cpu_to_virtio16(virtio_is_little_endian(vdev), val);
}
static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val)
{
- return __virtio32_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+ return __virtio32_to_cpu(virtio_is_little_endian(vdev), val);
}
static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val)
{
- return __cpu_to_virtio32(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+ return __cpu_to_virtio32(virtio_is_little_endian(vdev), val);
}
static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val)
{
- return __virtio64_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+ return __virtio64_to_cpu(virtio_is_little_endian(vdev), val);
}
static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
{
- return __cpu_to_virtio64(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+ return __cpu_to_virtio64(virtio_is_little_endian(vdev), val);
}
-
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index bcf5ec760eb9..5a6016224bb9 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -128,6 +128,7 @@ static const char * const page_flag_names[] = {
[KPF_THP] = "t:thp",
[KPF_BALLOON] = "o:balloon",
[KPF_ZERO_PAGE] = "z:zero_page",
+ [KPF_IDLE] = "i:idle_page",
[KPF_RESERVED] = "r:reserved",
[KPF_MLOCKED] = "m:mlocked",
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 21a0ab2d8919..69bca185c471 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -221,17 +221,23 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
kvm_timer_update_state(vcpu);
/*
- * If we enter the guest with the virtual input level to the VGIC
- * asserted, then we have already told the VGIC what we need to, and
- * we don't need to exit from the guest until the guest deactivates
- * the already injected interrupt, so therefore we should set the
- * hardware active state to prevent unnecessary exits from the guest.
- *
- * Conversely, if the virtual input level is deasserted, then always
- * clear the hardware active state to ensure that hardware interrupts
- * from the timer triggers a guest exit.
- */
- if (timer->irq.level)
+ * If we enter the guest with the virtual input level to the VGIC
+ * asserted, then we have already told the VGIC what we need to, and
+ * we don't need to exit from the guest until the guest deactivates
+ * the already injected interrupt, so therefore we should set the
+ * hardware active state to prevent unnecessary exits from the guest.
+ *
+ * Also, if we enter the guest with the virtual timer interrupt active,
+ * then it must be active on the physical distributor, because we set
+ * the HW bit and the guest must be able to deactivate the virtual and
+ * physical interrupt at the same time.
+ *
+ * Conversely, if the virtual input level is deasserted and the virtual
+ * interrupt is not active, then always clear the hardware active state
+ * to ensure that hardware interrupts from the timer triggers a guest
+ * exit.
+ */
+ if (timer->irq.level || kvm_vgic_map_is_active(vcpu, timer->map))
phys_active = true;
else
phys_active = false;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 533538385d5d..7a2f449bd85d 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1096,6 +1096,27 @@ static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu)
vgic_set_lr(vcpu, lr_nr, vlr);
}
+static bool dist_active_irq(struct kvm_vcpu *vcpu)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
+}
+
+bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map)
+{
+ int i;
+
+ for (i = 0; i < vcpu->arch.vgic_cpu.nr_lr; i++) {
+ struct vgic_lr vlr = vgic_get_lr(vcpu, i);
+
+ if (vlr.irq == map->virt_irq && vlr.state & LR_STATE_ACTIVE)
+ return true;
+ }
+
+ return vgic_irq_is_active(vcpu, map->virt_irq);
+}
+
/*
* An interrupt may have been disabled after being made pending on the
* CPU interface (the classic case is a timer running while we're
@@ -1248,7 +1269,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
* may have been serviced from another vcpu. In all cases,
* move along.
*/
- if (!kvm_vgic_vcpu_pending_irq(vcpu) && !kvm_vgic_vcpu_active_irq(vcpu))
+ if (!kvm_vgic_vcpu_pending_irq(vcpu) && !dist_active_irq(vcpu))
goto epilog;
/* SGIs */
@@ -1396,25 +1417,13 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- struct irq_phys_map *map;
- bool phys_active;
bool level_pending;
- int ret;
if (!(vlr.state & LR_HW))
return false;
- map = vgic_irq_map_search(vcpu, vlr.irq);
- BUG_ON(!map);
-
- ret = irq_get_irqchip_state(map->irq,
- IRQCHIP_STATE_ACTIVE,
- &phys_active);
-
- WARN_ON(ret);
-
- if (phys_active)
- return 0;
+ if (vlr.state & LR_STATE_ACTIVE)
+ return false;
spin_lock(&dist->lock);
level_pending = process_queued_irq(vcpu, lr, vlr);
@@ -1479,17 +1488,6 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
}
-int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
- if (!irqchip_in_kernel(vcpu->kvm))
- return 0;
-
- return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
-}
-
-
void vgic_kick_vcpus(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;